xref: /openbmc/linux/fs/inode.c (revision 3461e3bf)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * (C) 1997 Linus Torvalds
44b4563dcSChristoph Hellwig  * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
51da177e4SLinus Torvalds  */
6e59cc473SAl Viro #include <linux/export.h>
71da177e4SLinus Torvalds #include <linux/fs.h>
85970e15dSJeff Layton #include <linux/filelock.h>
91da177e4SLinus Torvalds #include <linux/mm.h>
101da177e4SLinus Torvalds #include <linux/backing-dev.h>
111da177e4SLinus Torvalds #include <linux/hash.h>
121da177e4SLinus Torvalds #include <linux/swap.h>
131da177e4SLinus Torvalds #include <linux/security.h>
141da177e4SLinus Torvalds #include <linux/cdev.h>
1557c8a661SMike Rapoport #include <linux/memblock.h>
163be25f49SEric Paris #include <linux/fsnotify.h>
17fc33a7bbSChristoph Hellwig #include <linux/mount.h>
18f19d4a8fSAl Viro #include <linux/posix_acl.h>
194b4563dcSChristoph Hellwig #include <linux/buffer_head.h> /* for inode_has_buffers */
207ada4db8SMiklos Szeredi #include <linux/ratelimit.h>
21bc3b14cbSDave Chinner #include <linux/list_lru.h>
22ae5e165dSJeff Layton #include <linux/iversion.h>
230ae45f63STheodore Ts'o #include <trace/events/writeback.h>
24a66979abSDave Chinner #include "internal.h"
251da177e4SLinus Torvalds 
261da177e4SLinus Torvalds /*
274b4563dcSChristoph Hellwig  * Inode locking rules:
28250df6edSDave Chinner  *
29250df6edSDave Chinner  * inode->i_lock protects:
3010e14073SJchao Sun  *   inode->i_state, inode->i_hash, __iget(), inode->i_io_list
31bc3b14cbSDave Chinner  * Inode LRU list locks protect:
3298b745c6SDave Chinner  *   inode->i_sb->s_inode_lru, inode->i_lru
3374278da9SDave Chinner  * inode->i_sb->s_inode_list_lock protects:
3474278da9SDave Chinner  *   inode->i_sb->s_inodes, inode->i_sb_list
35f758eeabSChristoph Hellwig  * bdi->wb.list_lock protects:
36c7f54084SDave Chinner  *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
3767a23c49SDave Chinner  * inode_hash_lock protects:
3867a23c49SDave Chinner  *   inode_hashtable, inode->i_hash
39250df6edSDave Chinner  *
40250df6edSDave Chinner  * Lock ordering:
4155fa6091SDave Chinner  *
4274278da9SDave Chinner  * inode->i_sb->s_inode_list_lock
4355fa6091SDave Chinner  *   inode->i_lock
44bc3b14cbSDave Chinner  *     Inode LRU list locks
45a66979abSDave Chinner  *
46f758eeabSChristoph Hellwig  * bdi->wb.list_lock
47a66979abSDave Chinner  *   inode->i_lock
4867a23c49SDave Chinner  *
4967a23c49SDave Chinner  * inode_hash_lock
5074278da9SDave Chinner  *   inode->i_sb->s_inode_list_lock
5167a23c49SDave Chinner  *   inode->i_lock
5267a23c49SDave Chinner  *
5367a23c49SDave Chinner  * iunique_lock
5467a23c49SDave Chinner  *   inode_hash_lock
55250df6edSDave Chinner  */
56250df6edSDave Chinner 
57fa3536ccSEric Dumazet static unsigned int i_hash_mask __read_mostly;
58fa3536ccSEric Dumazet static unsigned int i_hash_shift __read_mostly;
5967a23c49SDave Chinner static struct hlist_head *inode_hashtable __read_mostly;
6067a23c49SDave Chinner static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds /*
637dcda1c9SJens Axboe  * Empty aops. Can be used for the cases where the user does not
647dcda1c9SJens Axboe  * define any of the address_space operations.
657dcda1c9SJens Axboe  */
667dcda1c9SJens Axboe const struct address_space_operations empty_aops = {
677dcda1c9SJens Axboe };
687dcda1c9SJens Axboe EXPORT_SYMBOL(empty_aops);
697dcda1c9SJens Axboe 
703942c07cSGlauber Costa static DEFINE_PER_CPU(unsigned long, nr_inodes);
713942c07cSGlauber Costa static DEFINE_PER_CPU(unsigned long, nr_unused);
72cffbc8aaSDave Chinner 
73e18b890bSChristoph Lameter static struct kmem_cache *inode_cachep __read_mostly;
741da177e4SLinus Torvalds 
get_nr_inodes(void)753942c07cSGlauber Costa static long get_nr_inodes(void)
76cffbc8aaSDave Chinner {
773e880fb5SNick Piggin 	int i;
783942c07cSGlauber Costa 	long sum = 0;
793e880fb5SNick Piggin 	for_each_possible_cpu(i)
803e880fb5SNick Piggin 		sum += per_cpu(nr_inodes, i);
813e880fb5SNick Piggin 	return sum < 0 ? 0 : sum;
82cffbc8aaSDave Chinner }
83cffbc8aaSDave Chinner 
get_nr_inodes_unused(void)843942c07cSGlauber Costa static inline long get_nr_inodes_unused(void)
85cffbc8aaSDave Chinner {
86fcb94f72SDave Chinner 	int i;
873942c07cSGlauber Costa 	long sum = 0;
88fcb94f72SDave Chinner 	for_each_possible_cpu(i)
89fcb94f72SDave Chinner 		sum += per_cpu(nr_unused, i);
90fcb94f72SDave Chinner 	return sum < 0 ? 0 : sum;
91cffbc8aaSDave Chinner }
92cffbc8aaSDave Chinner 
get_nr_dirty_inodes(void)933942c07cSGlauber Costa long get_nr_dirty_inodes(void)
94cffbc8aaSDave Chinner {
953e880fb5SNick Piggin 	/* not actually dirty inodes, but a wild approximation */
963942c07cSGlauber Costa 	long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
97cffbc8aaSDave Chinner 	return nr_dirty > 0 ? nr_dirty : 0;
98cffbc8aaSDave Chinner }
99cffbc8aaSDave Chinner 
100cffbc8aaSDave Chinner /*
101cffbc8aaSDave Chinner  * Handle nr_inode sysctl
102cffbc8aaSDave Chinner  */
103cffbc8aaSDave Chinner #ifdef CONFIG_SYSCTL
1041d67fe58SLuis Chamberlain /*
1051d67fe58SLuis Chamberlain  * Statistics gathering..
1061d67fe58SLuis Chamberlain  */
1071d67fe58SLuis Chamberlain static struct inodes_stat_t inodes_stat;
1081d67fe58SLuis Chamberlain 
proc_nr_inodes(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1091d67fe58SLuis Chamberlain static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
1101d67fe58SLuis Chamberlain 			  size_t *lenp, loff_t *ppos)
111cffbc8aaSDave Chinner {
112cffbc8aaSDave Chinner 	inodes_stat.nr_inodes = get_nr_inodes();
113fcb94f72SDave Chinner 	inodes_stat.nr_unused = get_nr_inodes_unused();
1143942c07cSGlauber Costa 	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
115cffbc8aaSDave Chinner }
1161d67fe58SLuis Chamberlain 
1171d67fe58SLuis Chamberlain static struct ctl_table inodes_sysctls[] = {
1181d67fe58SLuis Chamberlain 	{
1191d67fe58SLuis Chamberlain 		.procname	= "inode-nr",
1201d67fe58SLuis Chamberlain 		.data		= &inodes_stat,
1211d67fe58SLuis Chamberlain 		.maxlen		= 2*sizeof(long),
1221d67fe58SLuis Chamberlain 		.mode		= 0444,
1231d67fe58SLuis Chamberlain 		.proc_handler	= proc_nr_inodes,
1241d67fe58SLuis Chamberlain 	},
1251d67fe58SLuis Chamberlain 	{
1261d67fe58SLuis Chamberlain 		.procname	= "inode-state",
1271d67fe58SLuis Chamberlain 		.data		= &inodes_stat,
1281d67fe58SLuis Chamberlain 		.maxlen		= 7*sizeof(long),
1291d67fe58SLuis Chamberlain 		.mode		= 0444,
1301d67fe58SLuis Chamberlain 		.proc_handler	= proc_nr_inodes,
1311d67fe58SLuis Chamberlain 	},
1321d67fe58SLuis Chamberlain 	{ }
1331d67fe58SLuis Chamberlain };
1341d67fe58SLuis Chamberlain 
init_fs_inode_sysctls(void)1351d67fe58SLuis Chamberlain static int __init init_fs_inode_sysctls(void)
1361d67fe58SLuis Chamberlain {
1371d67fe58SLuis Chamberlain 	register_sysctl_init("fs", inodes_sysctls);
1381d67fe58SLuis Chamberlain 	return 0;
1391d67fe58SLuis Chamberlain }
1401d67fe58SLuis Chamberlain early_initcall(init_fs_inode_sysctls);
141cffbc8aaSDave Chinner #endif
142cffbc8aaSDave Chinner 
no_open(struct inode * inode,struct file * file)143bd9b51e7SAl Viro static int no_open(struct inode *inode, struct file *file)
144bd9b51e7SAl Viro {
145bd9b51e7SAl Viro 	return -ENXIO;
146bd9b51e7SAl Viro }
147bd9b51e7SAl Viro 
1482cb1599fSDavid Chinner /**
1496e7c2b4dSMasahiro Yamada  * inode_init_always - perform inode structure initialisation
1500bc02f3fSRandy Dunlap  * @sb: superblock inode belongs to
1510bc02f3fSRandy Dunlap  * @inode: inode to initialise
1522cb1599fSDavid Chinner  *
1532cb1599fSDavid Chinner  * These are initializations that need to be done on every inode
1542cb1599fSDavid Chinner  * allocation as the fields are not initialised by slab allocation.
1552cb1599fSDavid Chinner  */
inode_init_always(struct super_block * sb,struct inode * inode)15654e34621SChristoph Hellwig int inode_init_always(struct super_block *sb, struct inode *inode)
1571da177e4SLinus Torvalds {
1586e1d5dccSAlexey Dobriyan 	static const struct inode_operations empty_iops;
159bd9b51e7SAl Viro 	static const struct file_operations no_open_fops = {.open = no_open};
1601da177e4SLinus Torvalds 	struct address_space *const mapping = &inode->i_data;
1611da177e4SLinus Torvalds 
1621da177e4SLinus Torvalds 	inode->i_sb = sb;
1631da177e4SLinus Torvalds 	inode->i_blkbits = sb->s_blocksize_bits;
1641da177e4SLinus Torvalds 	inode->i_flags = 0;
1658019ad13SPeter Zijlstra 	atomic64_set(&inode->i_sequence, 0);
1661da177e4SLinus Torvalds 	atomic_set(&inode->i_count, 1);
1671da177e4SLinus Torvalds 	inode->i_op = &empty_iops;
168bd9b51e7SAl Viro 	inode->i_fop = &no_open_fops;
169edbb35ccSEric Biggers 	inode->i_ino = 0;
170a78ef704SMiklos Szeredi 	inode->__i_nlink = 1;
1713ddcd056SLinus Torvalds 	inode->i_opflags = 0;
172d0a5b995SAndreas Gruenbacher 	if (sb->s_xattr)
173d0a5b995SAndreas Gruenbacher 		inode->i_opflags |= IOP_XATTR;
17492361636SEric W. Biederman 	i_uid_write(inode, 0);
17592361636SEric W. Biederman 	i_gid_write(inode, 0);
1761da177e4SLinus Torvalds 	atomic_set(&inode->i_writecount, 0);
1771da177e4SLinus Torvalds 	inode->i_size = 0;
178c75b1d94SJens Axboe 	inode->i_write_hint = WRITE_LIFE_NOT_SET;
1791da177e4SLinus Torvalds 	inode->i_blocks = 0;
1801da177e4SLinus Torvalds 	inode->i_bytes = 0;
1811da177e4SLinus Torvalds 	inode->i_generation = 0;
1821da177e4SLinus Torvalds 	inode->i_pipe = NULL;
1831da177e4SLinus Torvalds 	inode->i_cdev = NULL;
18461ba64fcSAl Viro 	inode->i_link = NULL;
18584e710daSAl Viro 	inode->i_dir_seq = 0;
1861da177e4SLinus Torvalds 	inode->i_rdev = 0;
1871da177e4SLinus Torvalds 	inode->dirtied_when = 0;
1886146f0d5SMimi Zohar 
1893d65ae46STahsin Erdogan #ifdef CONFIG_CGROUP_WRITEBACK
1903d65ae46STahsin Erdogan 	inode->i_wb_frn_winner = 0;
1913d65ae46STahsin Erdogan 	inode->i_wb_frn_avg_time = 0;
1923d65ae46STahsin Erdogan 	inode->i_wb_frn_history = 0;
1933d65ae46STahsin Erdogan #endif
1943d65ae46STahsin Erdogan 
195d475fd42SPeter Zijlstra 	spin_lock_init(&inode->i_lock);
196d475fd42SPeter Zijlstra 	lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
197d475fd42SPeter Zijlstra 
1989902af79SAl Viro 	init_rwsem(&inode->i_rwsem);
1999902af79SAl Viro 	lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
200d475fd42SPeter Zijlstra 
201bd5fe6c5SChristoph Hellwig 	atomic_set(&inode->i_dio_count, 0);
202d475fd42SPeter Zijlstra 
2031da177e4SLinus Torvalds 	mapping->a_ops = &empty_aops;
2041da177e4SLinus Torvalds 	mapping->host = inode;
2051da177e4SLinus Torvalds 	mapping->flags = 0;
206829bc787SDarrick J. Wong 	mapping->wb_err = 0;
2074bb5f5d9SDavid Herrmann 	atomic_set(&mapping->i_mmap_writable, 0);
20809d91cdaSSong Liu #ifdef CONFIG_READ_ONLY_THP_FOR_FS
20909d91cdaSSong Liu 	atomic_set(&mapping->nr_thps, 0);
21009d91cdaSSong Liu #endif
2113c1d4378SHugh Dickins 	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
212252aa6f5SRafael Aquini 	mapping->private_data = NULL;
2137d455e00SChris Mason 	mapping->writeback_index = 0;
21423ca067bSSebastian Andrzej Siewior 	init_rwsem(&mapping->invalidate_lock);
21523ca067bSSebastian Andrzej Siewior 	lockdep_set_class_and_name(&mapping->invalidate_lock,
21623ca067bSSebastian Andrzej Siewior 				   &sb->s_type->invalidate_lock_key,
21723ca067bSSebastian Andrzej Siewior 				   "mapping.invalidate_lock");
218*3461e3bfSChristoph Hellwig 	if (sb->s_iflags & SB_I_STABLE_WRITES)
219*3461e3bfSChristoph Hellwig 		mapping_set_stable_writes(mapping);
220e6c6e640SAl Viro 	inode->i_private = NULL;
2211da177e4SLinus Torvalds 	inode->i_mapping = mapping;
222b3d9b7a3SAl Viro 	INIT_HLIST_HEAD(&inode->i_dentry);	/* buggered by rcu freeing */
223f19d4a8fSAl Viro #ifdef CONFIG_FS_POSIX_ACL
224f19d4a8fSAl Viro 	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
225f19d4a8fSAl Viro #endif
2262cb1599fSDavid Chinner 
2273be25f49SEric Paris #ifdef CONFIG_FSNOTIFY
2283be25f49SEric Paris 	inode->i_fsnotify_mask = 0;
2293be25f49SEric Paris #endif
2304a075e39SJeff Layton 	inode->i_flctx = NULL;
2312e488f13SDongliang Mu 
2322e488f13SDongliang Mu 	if (unlikely(security_inode_alloc(inode)))
2332e488f13SDongliang Mu 		return -ENOMEM;
2343e880fb5SNick Piggin 	this_cpu_inc(nr_inodes);
235cffbc8aaSDave Chinner 
23654e34621SChristoph Hellwig 	return 0;
2371da177e4SLinus Torvalds }
2382cb1599fSDavid Chinner EXPORT_SYMBOL(inode_init_always);
2392cb1599fSDavid Chinner 
free_inode_nonrcu(struct inode * inode)240fdb0da89SAl Viro void free_inode_nonrcu(struct inode *inode)
241fdb0da89SAl Viro {
242fdb0da89SAl Viro 	kmem_cache_free(inode_cachep, inode);
243fdb0da89SAl Viro }
244fdb0da89SAl Viro EXPORT_SYMBOL(free_inode_nonrcu);
245fdb0da89SAl Viro 
i_callback(struct rcu_head * head)246fdb0da89SAl Viro static void i_callback(struct rcu_head *head)
247fdb0da89SAl Viro {
248fdb0da89SAl Viro 	struct inode *inode = container_of(head, struct inode, i_rcu);
249fdb0da89SAl Viro 	if (inode->free_inode)
250fdb0da89SAl Viro 		inode->free_inode(inode);
251fdb0da89SAl Viro 	else
252fdb0da89SAl Viro 		free_inode_nonrcu(inode);
253fdb0da89SAl Viro }
254fdb0da89SAl Viro 
alloc_inode(struct super_block * sb)2552cb1599fSDavid Chinner static struct inode *alloc_inode(struct super_block *sb)
2562cb1599fSDavid Chinner {
257fdb0da89SAl Viro 	const struct super_operations *ops = sb->s_op;
2582cb1599fSDavid Chinner 	struct inode *inode;
2592cb1599fSDavid Chinner 
260fdb0da89SAl Viro 	if (ops->alloc_inode)
261fdb0da89SAl Viro 		inode = ops->alloc_inode(sb);
2622cb1599fSDavid Chinner 	else
2638b9f3ac5SMuchun Song 		inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL);
2642cb1599fSDavid Chinner 
26554e34621SChristoph Hellwig 	if (!inode)
2662cb1599fSDavid Chinner 		return NULL;
26754e34621SChristoph Hellwig 
26854e34621SChristoph Hellwig 	if (unlikely(inode_init_always(sb, inode))) {
269fdb0da89SAl Viro 		if (ops->destroy_inode) {
270fdb0da89SAl Viro 			ops->destroy_inode(inode);
271fdb0da89SAl Viro 			if (!ops->free_inode)
272fdb0da89SAl Viro 				return NULL;
273fdb0da89SAl Viro 		}
274fdb0da89SAl Viro 		inode->free_inode = ops->free_inode;
275fdb0da89SAl Viro 		i_callback(&inode->i_rcu);
27654e34621SChristoph Hellwig 		return NULL;
27754e34621SChristoph Hellwig 	}
27854e34621SChristoph Hellwig 
27954e34621SChristoph Hellwig 	return inode;
2802cb1599fSDavid Chinner }
2811da177e4SLinus Torvalds 
__destroy_inode(struct inode * inode)2822e00c97eSChristoph Hellwig void __destroy_inode(struct inode *inode)
2831da177e4SLinus Torvalds {
284b7542f8cSEric Sesterhenn 	BUG_ON(inode_has_buffers(inode));
28552ebea74STejun Heo 	inode_detach_wb(inode);
2861da177e4SLinus Torvalds 	security_inode_free(inode);
2873be25f49SEric Paris 	fsnotify_inode_delete(inode);
288f27a0fe0SJeff Layton 	locks_free_lock_context(inode);
2897ada4db8SMiklos Szeredi 	if (!inode->i_nlink) {
2907ada4db8SMiklos Szeredi 		WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
2917ada4db8SMiklos Szeredi 		atomic_long_dec(&inode->i_sb->s_remove_count);
2927ada4db8SMiklos Szeredi 	}
2937ada4db8SMiklos Szeredi 
294f19d4a8fSAl Viro #ifdef CONFIG_FS_POSIX_ACL
295b8a7a3a6SAndreas Gruenbacher 	if (inode->i_acl && !is_uncached_acl(inode->i_acl))
296f19d4a8fSAl Viro 		posix_acl_release(inode->i_acl);
297b8a7a3a6SAndreas Gruenbacher 	if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
298f19d4a8fSAl Viro 		posix_acl_release(inode->i_default_acl);
299f19d4a8fSAl Viro #endif
3003e880fb5SNick Piggin 	this_cpu_dec(nr_inodes);
3012e00c97eSChristoph Hellwig }
3022e00c97eSChristoph Hellwig EXPORT_SYMBOL(__destroy_inode);
3032e00c97eSChristoph Hellwig 
destroy_inode(struct inode * inode)30456b0dacfSChristoph Hellwig static void destroy_inode(struct inode *inode)
3052e00c97eSChristoph Hellwig {
306fdb0da89SAl Viro 	const struct super_operations *ops = inode->i_sb->s_op;
307fdb0da89SAl Viro 
3087ccf19a8SNick Piggin 	BUG_ON(!list_empty(&inode->i_lru));
3092e00c97eSChristoph Hellwig 	__destroy_inode(inode);
310fdb0da89SAl Viro 	if (ops->destroy_inode) {
311fdb0da89SAl Viro 		ops->destroy_inode(inode);
312fdb0da89SAl Viro 		if (!ops->free_inode)
313fdb0da89SAl Viro 			return;
314fdb0da89SAl Viro 	}
315fdb0da89SAl Viro 	inode->free_inode = ops->free_inode;
316fa0d7e3dSNick Piggin 	call_rcu(&inode->i_rcu, i_callback);
3171da177e4SLinus Torvalds }
3181da177e4SLinus Torvalds 
3197ada4db8SMiklos Szeredi /**
3207ada4db8SMiklos Szeredi  * drop_nlink - directly drop an inode's link count
3217ada4db8SMiklos Szeredi  * @inode: inode
3227ada4db8SMiklos Szeredi  *
3237ada4db8SMiklos Szeredi  * This is a low-level filesystem helper to replace any
3247ada4db8SMiklos Szeredi  * direct filesystem manipulation of i_nlink.  In cases
3257ada4db8SMiklos Szeredi  * where we are attempting to track writes to the
3267ada4db8SMiklos Szeredi  * filesystem, a decrement to zero means an imminent
3277ada4db8SMiklos Szeredi  * write when the file is truncated and actually unlinked
3287ada4db8SMiklos Szeredi  * on the filesystem.
3297ada4db8SMiklos Szeredi  */
drop_nlink(struct inode * inode)3307ada4db8SMiklos Szeredi void drop_nlink(struct inode *inode)
3317ada4db8SMiklos Szeredi {
3327ada4db8SMiklos Szeredi 	WARN_ON(inode->i_nlink == 0);
3337ada4db8SMiklos Szeredi 	inode->__i_nlink--;
3347ada4db8SMiklos Szeredi 	if (!inode->i_nlink)
3357ada4db8SMiklos Szeredi 		atomic_long_inc(&inode->i_sb->s_remove_count);
3367ada4db8SMiklos Szeredi }
3377ada4db8SMiklos Szeredi EXPORT_SYMBOL(drop_nlink);
3387ada4db8SMiklos Szeredi 
3397ada4db8SMiklos Szeredi /**
3407ada4db8SMiklos Szeredi  * clear_nlink - directly zero an inode's link count
3417ada4db8SMiklos Szeredi  * @inode: inode
3427ada4db8SMiklos Szeredi  *
3437ada4db8SMiklos Szeredi  * This is a low-level filesystem helper to replace any
3447ada4db8SMiklos Szeredi  * direct filesystem manipulation of i_nlink.  See
3457ada4db8SMiklos Szeredi  * drop_nlink() for why we care about i_nlink hitting zero.
3467ada4db8SMiklos Szeredi  */
clear_nlink(struct inode * inode)3477ada4db8SMiklos Szeredi void clear_nlink(struct inode *inode)
3487ada4db8SMiklos Szeredi {
3497ada4db8SMiklos Szeredi 	if (inode->i_nlink) {
3507ada4db8SMiklos Szeredi 		inode->__i_nlink = 0;
3517ada4db8SMiklos Szeredi 		atomic_long_inc(&inode->i_sb->s_remove_count);
3527ada4db8SMiklos Szeredi 	}
3537ada4db8SMiklos Szeredi }
3547ada4db8SMiklos Szeredi EXPORT_SYMBOL(clear_nlink);
3557ada4db8SMiklos Szeredi 
3567ada4db8SMiklos Szeredi /**
3577ada4db8SMiklos Szeredi  * set_nlink - directly set an inode's link count
3587ada4db8SMiklos Szeredi  * @inode: inode
3597ada4db8SMiklos Szeredi  * @nlink: new nlink (should be non-zero)
3607ada4db8SMiklos Szeredi  *
3617ada4db8SMiklos Szeredi  * This is a low-level filesystem helper to replace any
3627ada4db8SMiklos Szeredi  * direct filesystem manipulation of i_nlink.
3637ada4db8SMiklos Szeredi  */
set_nlink(struct inode * inode,unsigned int nlink)3647ada4db8SMiklos Szeredi void set_nlink(struct inode *inode, unsigned int nlink)
3657ada4db8SMiklos Szeredi {
3667ada4db8SMiklos Szeredi 	if (!nlink) {
3677ada4db8SMiklos Szeredi 		clear_nlink(inode);
3687ada4db8SMiklos Szeredi 	} else {
3697ada4db8SMiklos Szeredi 		/* Yes, some filesystems do change nlink from zero to one */
3707ada4db8SMiklos Szeredi 		if (inode->i_nlink == 0)
3717ada4db8SMiklos Szeredi 			atomic_long_dec(&inode->i_sb->s_remove_count);
3727ada4db8SMiklos Szeredi 
3737ada4db8SMiklos Szeredi 		inode->__i_nlink = nlink;
3747ada4db8SMiklos Szeredi 	}
3757ada4db8SMiklos Szeredi }
3767ada4db8SMiklos Szeredi EXPORT_SYMBOL(set_nlink);
3777ada4db8SMiklos Szeredi 
3787ada4db8SMiklos Szeredi /**
3797ada4db8SMiklos Szeredi  * inc_nlink - directly increment an inode's link count
3807ada4db8SMiklos Szeredi  * @inode: inode
3817ada4db8SMiklos Szeredi  *
3827ada4db8SMiklos Szeredi  * This is a low-level filesystem helper to replace any
3837ada4db8SMiklos Szeredi  * direct filesystem manipulation of i_nlink.  Currently,
3847ada4db8SMiklos Szeredi  * it is only here for parity with dec_nlink().
3857ada4db8SMiklos Szeredi  */
inc_nlink(struct inode * inode)3867ada4db8SMiklos Szeredi void inc_nlink(struct inode *inode)
3877ada4db8SMiklos Szeredi {
388f4e0c30cSAl Viro 	if (unlikely(inode->i_nlink == 0)) {
389f4e0c30cSAl Viro 		WARN_ON(!(inode->i_state & I_LINKABLE));
3907ada4db8SMiklos Szeredi 		atomic_long_dec(&inode->i_sb->s_remove_count);
391f4e0c30cSAl Viro 	}
3927ada4db8SMiklos Szeredi 
3937ada4db8SMiklos Szeredi 	inode->__i_nlink++;
3947ada4db8SMiklos Szeredi }
3957ada4db8SMiklos Szeredi EXPORT_SYMBOL(inc_nlink);
3967ada4db8SMiklos Szeredi 
__address_space_init_once(struct address_space * mapping)397ae23395dSDave Chinner static void __address_space_init_once(struct address_space *mapping)
3982aa15890SMiklos Szeredi {
3997b785645SJohannes Weiner 	xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
400c8c06efaSDavidlohr Bueso 	init_rwsem(&mapping->i_mmap_rwsem);
4012aa15890SMiklos Szeredi 	INIT_LIST_HEAD(&mapping->private_list);
4022aa15890SMiklos Szeredi 	spin_lock_init(&mapping->private_lock);
403f808c13fSDavidlohr Bueso 	mapping->i_mmap = RB_ROOT_CACHED;
4042aa15890SMiklos Szeredi }
405ae23395dSDave Chinner 
address_space_init_once(struct address_space * mapping)406ae23395dSDave Chinner void address_space_init_once(struct address_space *mapping)
407ae23395dSDave Chinner {
408ae23395dSDave Chinner 	memset(mapping, 0, sizeof(*mapping));
409ae23395dSDave Chinner 	__address_space_init_once(mapping);
410ae23395dSDave Chinner }
4112aa15890SMiklos Szeredi EXPORT_SYMBOL(address_space_init_once);
4122aa15890SMiklos Szeredi 
4131da177e4SLinus Torvalds /*
4141da177e4SLinus Torvalds  * These are initializations that only need to be done
4151da177e4SLinus Torvalds  * once, because the fields are idempotent across use
4161da177e4SLinus Torvalds  * of the inode, so let the slab aware of that.
4171da177e4SLinus Torvalds  */
inode_init_once(struct inode * inode)4181da177e4SLinus Torvalds void inode_init_once(struct inode *inode)
4191da177e4SLinus Torvalds {
4201da177e4SLinus Torvalds 	memset(inode, 0, sizeof(*inode));
4211da177e4SLinus Torvalds 	INIT_HLIST_NODE(&inode->i_hash);
4221da177e4SLinus Torvalds 	INIT_LIST_HEAD(&inode->i_devices);
423c7f54084SDave Chinner 	INIT_LIST_HEAD(&inode->i_io_list);
4246c60d2b5SDave Chinner 	INIT_LIST_HEAD(&inode->i_wb_list);
4257ccf19a8SNick Piggin 	INIT_LIST_HEAD(&inode->i_lru);
42618cc912bSJeff Layton 	INIT_LIST_HEAD(&inode->i_sb_list);
427ae23395dSDave Chinner 	__address_space_init_once(&inode->i_data);
4281da177e4SLinus Torvalds 	i_size_ordered_init(inode);
4291da177e4SLinus Torvalds }
4301da177e4SLinus Torvalds EXPORT_SYMBOL(inode_init_once);
4311da177e4SLinus Torvalds 
init_once(void * foo)43251cc5068SAlexey Dobriyan static void init_once(void *foo)
4331da177e4SLinus Torvalds {
4341da177e4SLinus Torvalds 	struct inode *inode = (struct inode *) foo;
4351da177e4SLinus Torvalds 
4361da177e4SLinus Torvalds 	inode_init_once(inode);
4371da177e4SLinus Torvalds }
4381da177e4SLinus Torvalds 
4391da177e4SLinus Torvalds /*
440250df6edSDave Chinner  * inode->i_lock must be held
4411da177e4SLinus Torvalds  */
__iget(struct inode * inode)4421da177e4SLinus Torvalds void __iget(struct inode *inode)
4431da177e4SLinus Torvalds {
4449e38d86fSNick Piggin 	atomic_inc(&inode->i_count);
4451da177e4SLinus Torvalds }
4461da177e4SLinus Torvalds 
4477de9c6eeSAl Viro /*
4487de9c6eeSAl Viro  * get additional reference to inode; caller must already hold one.
4497de9c6eeSAl Viro  */
ihold(struct inode * inode)4507de9c6eeSAl Viro void ihold(struct inode *inode)
4517de9c6eeSAl Viro {
4527de9c6eeSAl Viro 	WARN_ON(atomic_inc_return(&inode->i_count) < 2);
4537de9c6eeSAl Viro }
4547de9c6eeSAl Viro EXPORT_SYMBOL(ihold);
4557de9c6eeSAl Viro 
__inode_add_lru(struct inode * inode,bool rotate)45651b8c1feSJohannes Weiner static void __inode_add_lru(struct inode *inode, bool rotate)
4579e38d86fSNick Piggin {
45851b8c1feSJohannes Weiner 	if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE))
45951b8c1feSJohannes Weiner 		return;
46051b8c1feSJohannes Weiner 	if (atomic_read(&inode->i_count))
46151b8c1feSJohannes Weiner 		return;
46251b8c1feSJohannes Weiner 	if (!(inode->i_sb->s_flags & SB_ACTIVE))
46351b8c1feSJohannes Weiner 		return;
46451b8c1feSJohannes Weiner 	if (!mapping_shrinkable(&inode->i_data))
46551b8c1feSJohannes Weiner 		return;
46651b8c1feSJohannes Weiner 
467bc3b14cbSDave Chinner 	if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
468fcb94f72SDave Chinner 		this_cpu_inc(nr_unused);
46951b8c1feSJohannes Weiner 	else if (rotate)
470563f4001SJosef Bacik 		inode->i_state |= I_REFERENCED;
4719e38d86fSNick Piggin }
4729e38d86fSNick Piggin 
4734eff96ddSJan Kara /*
4744eff96ddSJan Kara  * Add inode to LRU if needed (inode is unused and clean).
4754eff96ddSJan Kara  *
4764eff96ddSJan Kara  * Needs inode->i_lock held.
4774eff96ddSJan Kara  */
inode_add_lru(struct inode * inode)4784eff96ddSJan Kara void inode_add_lru(struct inode *inode)
4794eff96ddSJan Kara {
48051b8c1feSJohannes Weiner 	__inode_add_lru(inode, false);
4814eff96ddSJan Kara }
4824eff96ddSJan Kara 
inode_lru_list_del(struct inode * inode)4839e38d86fSNick Piggin static void inode_lru_list_del(struct inode *inode)
4849e38d86fSNick Piggin {
485bc3b14cbSDave Chinner 	if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
486fcb94f72SDave Chinner 		this_cpu_dec(nr_unused);
4871da177e4SLinus Torvalds }
4881da177e4SLinus Torvalds 
489646ec461SChristoph Hellwig /**
490646ec461SChristoph Hellwig  * inode_sb_list_add - add inode to the superblock list of inodes
491646ec461SChristoph Hellwig  * @inode: inode to add
492646ec461SChristoph Hellwig  */
inode_sb_list_add(struct inode * inode)493646ec461SChristoph Hellwig void inode_sb_list_add(struct inode *inode)
494646ec461SChristoph Hellwig {
49574278da9SDave Chinner 	spin_lock(&inode->i_sb->s_inode_list_lock);
49655fa6091SDave Chinner 	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
49774278da9SDave Chinner 	spin_unlock(&inode->i_sb->s_inode_list_lock);
498646ec461SChristoph Hellwig }
499646ec461SChristoph Hellwig EXPORT_SYMBOL_GPL(inode_sb_list_add);
500646ec461SChristoph Hellwig 
inode_sb_list_del(struct inode * inode)50155fa6091SDave Chinner static inline void inode_sb_list_del(struct inode *inode)
502646ec461SChristoph Hellwig {
503a209dfc7SEric Dumazet 	if (!list_empty(&inode->i_sb_list)) {
50474278da9SDave Chinner 		spin_lock(&inode->i_sb->s_inode_list_lock);
505646ec461SChristoph Hellwig 		list_del_init(&inode->i_sb_list);
50674278da9SDave Chinner 		spin_unlock(&inode->i_sb->s_inode_list_lock);
507646ec461SChristoph Hellwig 	}
508a209dfc7SEric Dumazet }
509646ec461SChristoph Hellwig 
hash(struct super_block * sb,unsigned long hashval)5104c51acbcSDave Chinner static unsigned long hash(struct super_block *sb, unsigned long hashval)
5114c51acbcSDave Chinner {
5124c51acbcSDave Chinner 	unsigned long tmp;
5134c51acbcSDave Chinner 
5144c51acbcSDave Chinner 	tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
5154c51acbcSDave Chinner 			L1_CACHE_BYTES;
5164b4563dcSChristoph Hellwig 	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
5174b4563dcSChristoph Hellwig 	return tmp & i_hash_mask;
5184c51acbcSDave Chinner }
5194c51acbcSDave Chinner 
5204c51acbcSDave Chinner /**
5214c51acbcSDave Chinner  *	__insert_inode_hash - hash an inode
5224c51acbcSDave Chinner  *	@inode: unhashed inode
5234c51acbcSDave Chinner  *	@hashval: unsigned long value used to locate this object in the
5244c51acbcSDave Chinner  *		inode_hashtable.
5254c51acbcSDave Chinner  *
5264c51acbcSDave Chinner  *	Add an inode to the inode hash for this superblock.
5274c51acbcSDave Chinner  */
__insert_inode_hash(struct inode * inode,unsigned long hashval)5284c51acbcSDave Chinner void __insert_inode_hash(struct inode *inode, unsigned long hashval)
5294c51acbcSDave Chinner {
530646ec461SChristoph Hellwig 	struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
531646ec461SChristoph Hellwig 
53267a23c49SDave Chinner 	spin_lock(&inode_hash_lock);
533250df6edSDave Chinner 	spin_lock(&inode->i_lock);
5343f19b2abSDavid Howells 	hlist_add_head_rcu(&inode->i_hash, b);
535250df6edSDave Chinner 	spin_unlock(&inode->i_lock);
53667a23c49SDave Chinner 	spin_unlock(&inode_hash_lock);
5374c51acbcSDave Chinner }
5384c51acbcSDave Chinner EXPORT_SYMBOL(__insert_inode_hash);
5394c51acbcSDave Chinner 
5404c51acbcSDave Chinner /**
541f2ee7abfSEric Dumazet  *	__remove_inode_hash - remove an inode from the hash
5424c51acbcSDave Chinner  *	@inode: inode to unhash
5434c51acbcSDave Chinner  *
5444c51acbcSDave Chinner  *	Remove an inode from the superblock.
5454c51acbcSDave Chinner  */
__remove_inode_hash(struct inode * inode)546f2ee7abfSEric Dumazet void __remove_inode_hash(struct inode *inode)
5474c51acbcSDave Chinner {
54867a23c49SDave Chinner 	spin_lock(&inode_hash_lock);
549250df6edSDave Chinner 	spin_lock(&inode->i_lock);
5503f19b2abSDavid Howells 	hlist_del_init_rcu(&inode->i_hash);
551250df6edSDave Chinner 	spin_unlock(&inode->i_lock);
55267a23c49SDave Chinner 	spin_unlock(&inode_hash_lock);
5534c51acbcSDave Chinner }
554f2ee7abfSEric Dumazet EXPORT_SYMBOL(__remove_inode_hash);
5554c51acbcSDave Chinner 
dump_mapping(const struct address_space * mapping)5563e9d80a8SMatthew Wilcox (Oracle) void dump_mapping(const struct address_space *mapping)
5573e9d80a8SMatthew Wilcox (Oracle) {
5583e9d80a8SMatthew Wilcox (Oracle) 	struct inode *host;
5593e9d80a8SMatthew Wilcox (Oracle) 	const struct address_space_operations *a_ops;
5603e9d80a8SMatthew Wilcox (Oracle) 	struct hlist_node *dentry_first;
5613e9d80a8SMatthew Wilcox (Oracle) 	struct dentry *dentry_ptr;
5623e9d80a8SMatthew Wilcox (Oracle) 	struct dentry dentry;
5633e9d80a8SMatthew Wilcox (Oracle) 	unsigned long ino;
5643e9d80a8SMatthew Wilcox (Oracle) 
5653e9d80a8SMatthew Wilcox (Oracle) 	/*
5663e9d80a8SMatthew Wilcox (Oracle) 	 * If mapping is an invalid pointer, we don't want to crash
5673e9d80a8SMatthew Wilcox (Oracle) 	 * accessing it, so probe everything depending on it carefully.
5683e9d80a8SMatthew Wilcox (Oracle) 	 */
5693e9d80a8SMatthew Wilcox (Oracle) 	if (get_kernel_nofault(host, &mapping->host) ||
5703e9d80a8SMatthew Wilcox (Oracle) 	    get_kernel_nofault(a_ops, &mapping->a_ops)) {
5713e9d80a8SMatthew Wilcox (Oracle) 		pr_warn("invalid mapping:%px\n", mapping);
5723e9d80a8SMatthew Wilcox (Oracle) 		return;
5733e9d80a8SMatthew Wilcox (Oracle) 	}
5743e9d80a8SMatthew Wilcox (Oracle) 
5753e9d80a8SMatthew Wilcox (Oracle) 	if (!host) {
5763e9d80a8SMatthew Wilcox (Oracle) 		pr_warn("aops:%ps\n", a_ops);
5773e9d80a8SMatthew Wilcox (Oracle) 		return;
5783e9d80a8SMatthew Wilcox (Oracle) 	}
5793e9d80a8SMatthew Wilcox (Oracle) 
5803e9d80a8SMatthew Wilcox (Oracle) 	if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
5813e9d80a8SMatthew Wilcox (Oracle) 	    get_kernel_nofault(ino, &host->i_ino)) {
5823e9d80a8SMatthew Wilcox (Oracle) 		pr_warn("aops:%ps invalid inode:%px\n", a_ops, host);
5833e9d80a8SMatthew Wilcox (Oracle) 		return;
5843e9d80a8SMatthew Wilcox (Oracle) 	}
5853e9d80a8SMatthew Wilcox (Oracle) 
5863e9d80a8SMatthew Wilcox (Oracle) 	if (!dentry_first) {
5873e9d80a8SMatthew Wilcox (Oracle) 		pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
5883e9d80a8SMatthew Wilcox (Oracle) 		return;
5893e9d80a8SMatthew Wilcox (Oracle) 	}
5903e9d80a8SMatthew Wilcox (Oracle) 
5913e9d80a8SMatthew Wilcox (Oracle) 	dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
5923e9d80a8SMatthew Wilcox (Oracle) 	if (get_kernel_nofault(dentry, dentry_ptr)) {
5933e9d80a8SMatthew Wilcox (Oracle) 		pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
5943e9d80a8SMatthew Wilcox (Oracle) 				a_ops, ino, dentry_ptr);
5953e9d80a8SMatthew Wilcox (Oracle) 		return;
5963e9d80a8SMatthew Wilcox (Oracle) 	}
5973e9d80a8SMatthew Wilcox (Oracle) 
5983e9d80a8SMatthew Wilcox (Oracle) 	/*
5993e9d80a8SMatthew Wilcox (Oracle) 	 * if dentry is corrupted, the %pd handler may still crash,
6003e9d80a8SMatthew Wilcox (Oracle) 	 * but it's unlikely that we reach here with a corrupt mapping
6013e9d80a8SMatthew Wilcox (Oracle) 	 */
6023e9d80a8SMatthew Wilcox (Oracle) 	pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
6033e9d80a8SMatthew Wilcox (Oracle) }
6043e9d80a8SMatthew Wilcox (Oracle) 
clear_inode(struct inode * inode)605dbd5768fSJan Kara void clear_inode(struct inode *inode)
606b0683aa6SAl Viro {
60708142579SJan Kara 	/*
608b93b0163SMatthew Wilcox 	 * We have to cycle the i_pages lock here because reclaim can be in the
6096ffcd825SMatthew Wilcox (Oracle) 	 * process of removing the last page (in __filemap_remove_folio())
610b93b0163SMatthew Wilcox 	 * and we must not free the mapping under it.
61108142579SJan Kara 	 */
612b93b0163SMatthew Wilcox 	xa_lock_irq(&inode->i_data.i_pages);
613b0683aa6SAl Viro 	BUG_ON(inode->i_data.nrpages);
614786b3112SHugh Dickins 	/*
615786b3112SHugh Dickins 	 * Almost always, mapping_empty(&inode->i_data) here; but there are
616786b3112SHugh Dickins 	 * two known and long-standing ways in which nodes may get left behind
617786b3112SHugh Dickins 	 * (when deep radix-tree node allocation failed partway; or when THP
618786b3112SHugh Dickins 	 * collapse_file() failed). Until those two known cases are cleaned up,
619786b3112SHugh Dickins 	 * or a cleanup function is called here, do not BUG_ON(!mapping_empty),
620786b3112SHugh Dickins 	 * nor even WARN_ON(!mapping_empty).
621786b3112SHugh Dickins 	 */
622b93b0163SMatthew Wilcox 	xa_unlock_irq(&inode->i_data.i_pages);
623b0683aa6SAl Viro 	BUG_ON(!list_empty(&inode->i_data.private_list));
624b0683aa6SAl Viro 	BUG_ON(!(inode->i_state & I_FREEING));
625b0683aa6SAl Viro 	BUG_ON(inode->i_state & I_CLEAR);
6266c60d2b5SDave Chinner 	BUG_ON(!list_empty(&inode->i_wb_list));
627fa0d7e3dSNick Piggin 	/* don't need i_lock here, no concurrent mods to i_state */
628b0683aa6SAl Viro 	inode->i_state = I_FREEING | I_CLEAR;
629b0683aa6SAl Viro }
630dbd5768fSJan Kara EXPORT_SYMBOL(clear_inode);
631b0683aa6SAl Viro 
632b2b2af8eSDave Chinner /*
633b2b2af8eSDave Chinner  * Free the inode passed in, removing it from the lists it is still connected
634b2b2af8eSDave Chinner  * to. We remove any pages still attached to the inode and wait for any IO that
635b2b2af8eSDave Chinner  * is still in progress before finally destroying the inode.
636b2b2af8eSDave Chinner  *
637b2b2af8eSDave Chinner  * An inode must already be marked I_FREEING so that we avoid the inode being
638b2b2af8eSDave Chinner  * moved back onto lists if we race with other code that manipulates the lists
639b2b2af8eSDave Chinner  * (e.g. writeback_single_inode). The caller is responsible for setting this.
640b2b2af8eSDave Chinner  *
641b2b2af8eSDave Chinner  * An inode must already be removed from the LRU list before being evicted from
642b2b2af8eSDave Chinner  * the cache. This should occur atomically with setting the I_FREEING state
643b2b2af8eSDave Chinner  * flag, so no inodes here should ever be on the LRU when being evicted.
644b2b2af8eSDave Chinner  */
evict(struct inode * inode)645644da596SAl Viro static void evict(struct inode *inode)
646b4272d4cSAl Viro {
647b4272d4cSAl Viro 	const struct super_operations *op = inode->i_sb->s_op;
648b4272d4cSAl Viro 
649b2b2af8eSDave Chinner 	BUG_ON(!(inode->i_state & I_FREEING));
650b2b2af8eSDave Chinner 	BUG_ON(!list_empty(&inode->i_lru));
651b2b2af8eSDave Chinner 
652c7f54084SDave Chinner 	if (!list_empty(&inode->i_io_list))
653c7f54084SDave Chinner 		inode_io_list_del(inode);
654b12362bdSEric Dumazet 
65555fa6091SDave Chinner 	inode_sb_list_del(inode);
65655fa6091SDave Chinner 
657169ebd90SJan Kara 	/*
658169ebd90SJan Kara 	 * Wait for flusher thread to be done with the inode so that filesystem
659169ebd90SJan Kara 	 * does not start destroying it while writeback is still running. Since
660169ebd90SJan Kara 	 * the inode has I_FREEING set, flusher thread won't start new work on
661169ebd90SJan Kara 	 * the inode.  We just have to wait for running writeback to finish.
662169ebd90SJan Kara 	 */
663169ebd90SJan Kara 	inode_wait_for_writeback(inode);
6647994e6f7SJan Kara 
665be7ce416SAl Viro 	if (op->evict_inode) {
666be7ce416SAl Viro 		op->evict_inode(inode);
667b4272d4cSAl Viro 	} else {
66891b0abe3SJohannes Weiner 		truncate_inode_pages_final(&inode->i_data);
669dbd5768fSJan Kara 		clear_inode(inode);
670b4272d4cSAl Viro 	}
671661074e9SAl Viro 	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
672661074e9SAl Viro 		cd_forget(inode);
673b2b2af8eSDave Chinner 
674b2b2af8eSDave Chinner 	remove_inode_hash(inode);
675b2b2af8eSDave Chinner 
676b2b2af8eSDave Chinner 	spin_lock(&inode->i_lock);
677b2b2af8eSDave Chinner 	wake_up_bit(&inode->i_state, __I_NEW);
678b2b2af8eSDave Chinner 	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
679b2b2af8eSDave Chinner 	spin_unlock(&inode->i_lock);
680b2b2af8eSDave Chinner 
681b2b2af8eSDave Chinner 	destroy_inode(inode);
682b4272d4cSAl Viro }
683b4272d4cSAl Viro 
6841da177e4SLinus Torvalds /*
6851da177e4SLinus Torvalds  * dispose_list - dispose of the contents of a local list
6861da177e4SLinus Torvalds  * @head: the head of the list to free
6871da177e4SLinus Torvalds  *
6881da177e4SLinus Torvalds  * Dispose-list gets a local list with local inodes in it, so it doesn't
6891da177e4SLinus Torvalds  * need to worry about list corruption and SMP locks.
6901da177e4SLinus Torvalds  */
dispose_list(struct list_head * head)6911da177e4SLinus Torvalds static void dispose_list(struct list_head *head)
6921da177e4SLinus Torvalds {
6931da177e4SLinus Torvalds 	while (!list_empty(head)) {
6941da177e4SLinus Torvalds 		struct inode *inode;
6951da177e4SLinus Torvalds 
6967ccf19a8SNick Piggin 		inode = list_first_entry(head, struct inode, i_lru);
6977ccf19a8SNick Piggin 		list_del_init(&inode->i_lru);
6981da177e4SLinus Torvalds 
699644da596SAl Viro 		evict(inode);
700ac05fbb4SJosef Bacik 		cond_resched();
7011da177e4SLinus Torvalds 	}
7021da177e4SLinus Torvalds }
7031da177e4SLinus Torvalds 
7041da177e4SLinus Torvalds /**
70563997e98SAl Viro  * evict_inodes	- evict all evictable inodes for a superblock
70663997e98SAl Viro  * @sb:		superblock to operate on
7071da177e4SLinus Torvalds  *
70863997e98SAl Viro  * Make sure that no inodes with zero refcount are retained.  This is
7091751e8a6SLinus Torvalds  * called by superblock shutdown after having SB_ACTIVE flag removed,
71063997e98SAl Viro  * so any inode reaching zero refcount during or after that call will
71163997e98SAl Viro  * be immediately evicted.
71263997e98SAl Viro  */
evict_inodes(struct super_block * sb)71363997e98SAl Viro void evict_inodes(struct super_block *sb)
71463997e98SAl Viro {
71563997e98SAl Viro 	struct inode *inode, *next;
71663997e98SAl Viro 	LIST_HEAD(dispose);
71763997e98SAl Viro 
718ac05fbb4SJosef Bacik again:
71974278da9SDave Chinner 	spin_lock(&sb->s_inode_list_lock);
72063997e98SAl Viro 	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
72163997e98SAl Viro 		if (atomic_read(&inode->i_count))
72263997e98SAl Viro 			continue;
723250df6edSDave Chinner 
724250df6edSDave Chinner 		spin_lock(&inode->i_lock);
725250df6edSDave Chinner 		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
726250df6edSDave Chinner 			spin_unlock(&inode->i_lock);
72763997e98SAl Viro 			continue;
728250df6edSDave Chinner 		}
72963997e98SAl Viro 
73063997e98SAl Viro 		inode->i_state |= I_FREEING;
73102afc410SDave Chinner 		inode_lru_list_del(inode);
732250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
73302afc410SDave Chinner 		list_add(&inode->i_lru, &dispose);
734ac05fbb4SJosef Bacik 
735ac05fbb4SJosef Bacik 		/*
736ac05fbb4SJosef Bacik 		 * We can have a ton of inodes to evict at unmount time given
737ac05fbb4SJosef Bacik 		 * enough memory, check to see if we need to go to sleep for a
738ac05fbb4SJosef Bacik 		 * bit so we don't livelock.
739ac05fbb4SJosef Bacik 		 */
740ac05fbb4SJosef Bacik 		if (need_resched()) {
741ac05fbb4SJosef Bacik 			spin_unlock(&sb->s_inode_list_lock);
742ac05fbb4SJosef Bacik 			cond_resched();
743ac05fbb4SJosef Bacik 			dispose_list(&dispose);
744ac05fbb4SJosef Bacik 			goto again;
745ac05fbb4SJosef Bacik 		}
74663997e98SAl Viro 	}
74774278da9SDave Chinner 	spin_unlock(&sb->s_inode_list_lock);
74863997e98SAl Viro 
74963997e98SAl Viro 	dispose_list(&dispose);
75063997e98SAl Viro }
751799ea9e9SDarrick J. Wong EXPORT_SYMBOL_GPL(evict_inodes);
75263997e98SAl Viro 
75363997e98SAl Viro /**
754a0318786SChristoph Hellwig  * invalidate_inodes	- attempt to free all inodes on a superblock
755a0318786SChristoph Hellwig  * @sb:		superblock to operate on
756a0318786SChristoph Hellwig  *
757e127b9bcSChristoph Hellwig  * Attempts to free all inodes (including dirty inodes) for a given superblock.
7581da177e4SLinus Torvalds  */
invalidate_inodes(struct super_block * sb)759e127b9bcSChristoph Hellwig void invalidate_inodes(struct super_block *sb)
7601da177e4SLinus Torvalds {
761a0318786SChristoph Hellwig 	struct inode *inode, *next;
762a0318786SChristoph Hellwig 	LIST_HEAD(dispose);
7631da177e4SLinus Torvalds 
76404646aebSEric Sandeen again:
76574278da9SDave Chinner 	spin_lock(&sb->s_inode_list_lock);
766a0318786SChristoph Hellwig 	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
767250df6edSDave Chinner 		spin_lock(&inode->i_lock);
768250df6edSDave Chinner 		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
769250df6edSDave Chinner 			spin_unlock(&inode->i_lock);
7701da177e4SLinus Torvalds 			continue;
771250df6edSDave Chinner 		}
77299a38919SChristoph Hellwig 		if (atomic_read(&inode->i_count)) {
773250df6edSDave Chinner 			spin_unlock(&inode->i_lock);
77499a38919SChristoph Hellwig 			continue;
77599a38919SChristoph Hellwig 		}
77699a38919SChristoph Hellwig 
7771da177e4SLinus Torvalds 		inode->i_state |= I_FREEING;
77802afc410SDave Chinner 		inode_lru_list_del(inode);
779250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
78002afc410SDave Chinner 		list_add(&inode->i_lru, &dispose);
78104646aebSEric Sandeen 		if (need_resched()) {
78204646aebSEric Sandeen 			spin_unlock(&sb->s_inode_list_lock);
78304646aebSEric Sandeen 			cond_resched();
78404646aebSEric Sandeen 			dispose_list(&dispose);
78504646aebSEric Sandeen 			goto again;
78604646aebSEric Sandeen 		}
7871da177e4SLinus Torvalds 	}
78874278da9SDave Chinner 	spin_unlock(&sb->s_inode_list_lock);
7891da177e4SLinus Torvalds 
790a0318786SChristoph Hellwig 	dispose_list(&dispose);
7911da177e4SLinus Torvalds }
7921da177e4SLinus Torvalds 
7931da177e4SLinus Torvalds /*
794bc3b14cbSDave Chinner  * Isolate the inode from the LRU in preparation for freeing it.
7951da177e4SLinus Torvalds  *
7969e38d86fSNick Piggin  * If the inode has the I_REFERENCED flag set, then it means that it has been
7979e38d86fSNick Piggin  * used recently - the flag is set in iput_final(). When we encounter such an
7989e38d86fSNick Piggin  * inode, clear the flag and move it to the back of the LRU so it gets another
7999e38d86fSNick Piggin  * pass through the LRU before it gets reclaimed. This is necessary because of
8009e38d86fSNick Piggin  * the fact we are doing lazy LRU updates to minimise lock contention so the
8019e38d86fSNick Piggin  * LRU does not have strict ordering. Hence we don't want to reclaim inodes
8029e38d86fSNick Piggin  * with this flag set because they are the inodes that are out of order.
8031da177e4SLinus Torvalds  */
inode_lru_isolate(struct list_head * item,struct list_lru_one * lru,spinlock_t * lru_lock,void * arg)8043f97b163SVladimir Davydov static enum lru_status inode_lru_isolate(struct list_head *item,
8053f97b163SVladimir Davydov 		struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
8061da177e4SLinus Torvalds {
807bc3b14cbSDave Chinner 	struct list_head *freeable = arg;
808bc3b14cbSDave Chinner 	struct inode	*inode = container_of(item, struct inode, i_lru);
8091da177e4SLinus Torvalds 
8109e38d86fSNick Piggin 	/*
81151b8c1feSJohannes Weiner 	 * We are inverting the lru lock/inode->i_lock here, so use a
81251b8c1feSJohannes Weiner 	 * trylock. If we fail to get the lock, just skip it.
81302afc410SDave Chinner 	 */
814bc3b14cbSDave Chinner 	if (!spin_trylock(&inode->i_lock))
815bc3b14cbSDave Chinner 		return LRU_SKIP;
81602afc410SDave Chinner 
81702afc410SDave Chinner 	/*
81851b8c1feSJohannes Weiner 	 * Inodes can get referenced, redirtied, or repopulated while
81951b8c1feSJohannes Weiner 	 * they're already on the LRU, and this can make them
82051b8c1feSJohannes Weiner 	 * unreclaimable for a while. Remove them lazily here; iput,
82151b8c1feSJohannes Weiner 	 * sync, or the last page cache deletion will requeue them.
8229e38d86fSNick Piggin 	 */
8239e38d86fSNick Piggin 	if (atomic_read(&inode->i_count) ||
82451b8c1feSJohannes Weiner 	    (inode->i_state & ~I_REFERENCED) ||
82551b8c1feSJohannes Weiner 	    !mapping_shrinkable(&inode->i_data)) {
8263f97b163SVladimir Davydov 		list_lru_isolate(lru, &inode->i_lru);
827f283c86aSDave Chinner 		spin_unlock(&inode->i_lock);
828fcb94f72SDave Chinner 		this_cpu_dec(nr_unused);
829bc3b14cbSDave Chinner 		return LRU_REMOVED;
8309e38d86fSNick Piggin 	}
8319e38d86fSNick Piggin 
83251b8c1feSJohannes Weiner 	/* Recently referenced inodes get one more pass */
83369056ee6SDave Chinner 	if (inode->i_state & I_REFERENCED) {
8349e38d86fSNick Piggin 		inode->i_state &= ~I_REFERENCED;
835f283c86aSDave Chinner 		spin_unlock(&inode->i_lock);
836bc3b14cbSDave Chinner 		return LRU_ROTATE;
8371da177e4SLinus Torvalds 	}
838bc3b14cbSDave Chinner 
83951b8c1feSJohannes Weiner 	/*
84051b8c1feSJohannes Weiner 	 * On highmem systems, mapping_shrinkable() permits dropping
84151b8c1feSJohannes Weiner 	 * page cache in order to free up struct inodes: lowmem might
84251b8c1feSJohannes Weiner 	 * be under pressure before the cache inside the highmem zone.
84351b8c1feSJohannes Weiner 	 */
8447ae12c80SJohannes Weiner 	if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
8451da177e4SLinus Torvalds 		__iget(inode);
846250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
847bc3b14cbSDave Chinner 		spin_unlock(lru_lock);
848bc3b14cbSDave Chinner 		if (remove_inode_buffers(inode)) {
849bc3b14cbSDave Chinner 			unsigned long reap;
850bc3b14cbSDave Chinner 			reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
851f8891e5eSChristoph Lameter 			if (current_is_kswapd())
852f8891e5eSChristoph Lameter 				__count_vm_events(KSWAPD_INODESTEAL, reap);
853f8891e5eSChristoph Lameter 			else
854f8891e5eSChristoph Lameter 				__count_vm_events(PGINODESTEAL, reap);
855c7b23b68SYosry Ahmed 			mm_account_reclaimed_pages(reap);
856bc3b14cbSDave Chinner 		}
857bc3b14cbSDave Chinner 		iput(inode);
858bc3b14cbSDave Chinner 		spin_lock(lru_lock);
859bc3b14cbSDave Chinner 		return LRU_RETRY;
860bc3b14cbSDave Chinner 	}
8611da177e4SLinus Torvalds 
862bc3b14cbSDave Chinner 	WARN_ON(inode->i_state & I_NEW);
863bc3b14cbSDave Chinner 	inode->i_state |= I_FREEING;
8643f97b163SVladimir Davydov 	list_lru_isolate_move(lru, &inode->i_lru, freeable);
865bc3b14cbSDave Chinner 	spin_unlock(&inode->i_lock);
866bc3b14cbSDave Chinner 
867bc3b14cbSDave Chinner 	this_cpu_dec(nr_unused);
868bc3b14cbSDave Chinner 	return LRU_REMOVED;
869bc3b14cbSDave Chinner }
870bc3b14cbSDave Chinner 
871bc3b14cbSDave Chinner /*
872bc3b14cbSDave Chinner  * Walk the superblock inode LRU for freeable inodes and attempt to free them.
873bc3b14cbSDave Chinner  * This is called from the superblock shrinker function with a number of inodes
874bc3b14cbSDave Chinner  * to trim from the LRU. Inodes to be freed are moved to a temporary list and
875bc3b14cbSDave Chinner  * then are freed outside inode_lock by dispose_list().
876bc3b14cbSDave Chinner  */
prune_icache_sb(struct super_block * sb,struct shrink_control * sc)877503c358cSVladimir Davydov long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
878bc3b14cbSDave Chinner {
879bc3b14cbSDave Chinner 	LIST_HEAD(freeable);
880bc3b14cbSDave Chinner 	long freed;
881bc3b14cbSDave Chinner 
882503c358cSVladimir Davydov 	freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
883503c358cSVladimir Davydov 				     inode_lru_isolate, &freeable);
8841da177e4SLinus Torvalds 	dispose_list(&freeable);
8850a234c6dSDave Chinner 	return freed;
8861da177e4SLinus Torvalds }
8871da177e4SLinus Torvalds 
8881da177e4SLinus Torvalds static void __wait_on_freeing_inode(struct inode *inode);
8891da177e4SLinus Torvalds /*
8901da177e4SLinus Torvalds  * Called with the inode lock held.
8911da177e4SLinus Torvalds  */
find_inode(struct super_block * sb,struct hlist_head * head,int (* test)(struct inode *,void *),void * data)8926b3304b5SManish Katiyar static struct inode *find_inode(struct super_block *sb,
8936b3304b5SManish Katiyar 				struct hlist_head *head,
8946b3304b5SManish Katiyar 				int (*test)(struct inode *, void *),
8956b3304b5SManish Katiyar 				void *data)
8961da177e4SLinus Torvalds {
8971da177e4SLinus Torvalds 	struct inode *inode = NULL;
8981da177e4SLinus Torvalds 
8991da177e4SLinus Torvalds repeat:
900b67bfe0dSSasha Levin 	hlist_for_each_entry(inode, head, i_hash) {
9015a3cd992SAl Viro 		if (inode->i_sb != sb)
9025a3cd992SAl Viro 			continue;
9035a3cd992SAl Viro 		if (!test(inode, data))
9045a3cd992SAl Viro 			continue;
905250df6edSDave Chinner 		spin_lock(&inode->i_lock);
906a4ffdde6SAl Viro 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
9071da177e4SLinus Torvalds 			__wait_on_freeing_inode(inode);
9081da177e4SLinus Torvalds 			goto repeat;
9091da177e4SLinus Torvalds 		}
910c2b6d621SAl Viro 		if (unlikely(inode->i_state & I_CREATING)) {
911c2b6d621SAl Viro 			spin_unlock(&inode->i_lock);
912c2b6d621SAl Viro 			return ERR_PTR(-ESTALE);
913c2b6d621SAl Viro 		}
914f7899bd5SChristoph Hellwig 		__iget(inode);
915250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
916f7899bd5SChristoph Hellwig 		return inode;
9171da177e4SLinus Torvalds 	}
918f7899bd5SChristoph Hellwig 	return NULL;
9191da177e4SLinus Torvalds }
9201da177e4SLinus Torvalds 
9211da177e4SLinus Torvalds /*
9221da177e4SLinus Torvalds  * find_inode_fast is the fast path version of find_inode, see the comment at
9231da177e4SLinus Torvalds  * iget_locked for details.
9241da177e4SLinus Torvalds  */
find_inode_fast(struct super_block * sb,struct hlist_head * head,unsigned long ino)9256b3304b5SManish Katiyar static struct inode *find_inode_fast(struct super_block *sb,
9266b3304b5SManish Katiyar 				struct hlist_head *head, unsigned long ino)
9271da177e4SLinus Torvalds {
9281da177e4SLinus Torvalds 	struct inode *inode = NULL;
9291da177e4SLinus Torvalds 
9301da177e4SLinus Torvalds repeat:
931b67bfe0dSSasha Levin 	hlist_for_each_entry(inode, head, i_hash) {
9325a3cd992SAl Viro 		if (inode->i_ino != ino)
9335a3cd992SAl Viro 			continue;
9345a3cd992SAl Viro 		if (inode->i_sb != sb)
9355a3cd992SAl Viro 			continue;
936250df6edSDave Chinner 		spin_lock(&inode->i_lock);
937a4ffdde6SAl Viro 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
9381da177e4SLinus Torvalds 			__wait_on_freeing_inode(inode);
9391da177e4SLinus Torvalds 			goto repeat;
9401da177e4SLinus Torvalds 		}
941c2b6d621SAl Viro 		if (unlikely(inode->i_state & I_CREATING)) {
942c2b6d621SAl Viro 			spin_unlock(&inode->i_lock);
943c2b6d621SAl Viro 			return ERR_PTR(-ESTALE);
944c2b6d621SAl Viro 		}
945f7899bd5SChristoph Hellwig 		__iget(inode);
946250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
947f7899bd5SChristoph Hellwig 		return inode;
9481da177e4SLinus Torvalds 	}
949f7899bd5SChristoph Hellwig 	return NULL;
9501da177e4SLinus Torvalds }
9511da177e4SLinus Torvalds 
952f991bd2eSEric Dumazet /*
953f991bd2eSEric Dumazet  * Each cpu owns a range of LAST_INO_BATCH numbers.
954f991bd2eSEric Dumazet  * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
955f991bd2eSEric Dumazet  * to renew the exhausted range.
9568290c35fSDavid Chinner  *
957f991bd2eSEric Dumazet  * This does not significantly increase overflow rate because every CPU can
958f991bd2eSEric Dumazet  * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
959f991bd2eSEric Dumazet  * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
960f991bd2eSEric Dumazet  * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
961f991bd2eSEric Dumazet  * overflow rate by 2x, which does not seem too significant.
962f991bd2eSEric Dumazet  *
963f991bd2eSEric Dumazet  * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
964f991bd2eSEric Dumazet  * error if st_ino won't fit in target struct field. Use 32bit counter
965f991bd2eSEric Dumazet  * here to attempt to avoid that.
9668290c35fSDavid Chinner  */
967f991bd2eSEric Dumazet #define LAST_INO_BATCH 1024
968f991bd2eSEric Dumazet static DEFINE_PER_CPU(unsigned int, last_ino);
9698290c35fSDavid Chinner 
get_next_ino(void)97085fe4025SChristoph Hellwig unsigned int get_next_ino(void)
971f991bd2eSEric Dumazet {
972f991bd2eSEric Dumazet 	unsigned int *p = &get_cpu_var(last_ino);
973f991bd2eSEric Dumazet 	unsigned int res = *p;
974f991bd2eSEric Dumazet 
975f991bd2eSEric Dumazet #ifdef CONFIG_SMP
976f991bd2eSEric Dumazet 	if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
977f991bd2eSEric Dumazet 		static atomic_t shared_last_ino;
978f991bd2eSEric Dumazet 		int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
979f991bd2eSEric Dumazet 
980f991bd2eSEric Dumazet 		res = next - LAST_INO_BATCH;
9818290c35fSDavid Chinner 	}
982f991bd2eSEric Dumazet #endif
983f991bd2eSEric Dumazet 
9842adc376cSCarlos Maiolino 	res++;
9852adc376cSCarlos Maiolino 	/* get_next_ino should not provide a 0 inode number */
9862adc376cSCarlos Maiolino 	if (unlikely(!res))
9872adc376cSCarlos Maiolino 		res++;
9882adc376cSCarlos Maiolino 	*p = res;
989f991bd2eSEric Dumazet 	put_cpu_var(last_ino);
990f991bd2eSEric Dumazet 	return res;
991f991bd2eSEric Dumazet }
99285fe4025SChristoph Hellwig EXPORT_SYMBOL(get_next_ino);
9938290c35fSDavid Chinner 
9941da177e4SLinus Torvalds /**
995a209dfc7SEric Dumazet  *	new_inode_pseudo 	- obtain an inode
996a209dfc7SEric Dumazet  *	@sb: superblock
997a209dfc7SEric Dumazet  *
998a209dfc7SEric Dumazet  *	Allocates a new inode for given superblock.
999a209dfc7SEric Dumazet  *	Inode wont be chained in superblock s_inodes list
1000a209dfc7SEric Dumazet  *	This means :
1001a209dfc7SEric Dumazet  *	- fs can't be unmount
1002a209dfc7SEric Dumazet  *	- quotas, fsnotify, writeback can't work
1003a209dfc7SEric Dumazet  */
new_inode_pseudo(struct super_block * sb)1004a209dfc7SEric Dumazet struct inode *new_inode_pseudo(struct super_block *sb)
1005a209dfc7SEric Dumazet {
1006a209dfc7SEric Dumazet 	struct inode *inode = alloc_inode(sb);
1007a209dfc7SEric Dumazet 
1008a209dfc7SEric Dumazet 	if (inode) {
1009a209dfc7SEric Dumazet 		spin_lock(&inode->i_lock);
1010a209dfc7SEric Dumazet 		inode->i_state = 0;
1011a209dfc7SEric Dumazet 		spin_unlock(&inode->i_lock);
1012a209dfc7SEric Dumazet 	}
1013a209dfc7SEric Dumazet 	return inode;
1014a209dfc7SEric Dumazet }
1015a209dfc7SEric Dumazet 
1016a209dfc7SEric Dumazet /**
10171da177e4SLinus Torvalds  *	new_inode 	- obtain an inode
10181da177e4SLinus Torvalds  *	@sb: superblock
10191da177e4SLinus Torvalds  *
1020769848c0SMel Gorman  *	Allocates a new inode for given superblock. The default gfp_mask
10213c1d4378SHugh Dickins  *	for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
1022769848c0SMel Gorman  *	If HIGHMEM pages are unsuitable or it is known that pages allocated
1023769848c0SMel Gorman  *	for the page cache are not reclaimable or migratable,
1024769848c0SMel Gorman  *	mapping_set_gfp_mask() must be called with suitable flags on the
1025769848c0SMel Gorman  *	newly created inode's mapping
1026769848c0SMel Gorman  *
10271da177e4SLinus Torvalds  */
new_inode(struct super_block * sb)10281da177e4SLinus Torvalds struct inode *new_inode(struct super_block *sb)
10291da177e4SLinus Torvalds {
10301da177e4SLinus Torvalds 	struct inode *inode;
10311da177e4SLinus Torvalds 
1032a209dfc7SEric Dumazet 	inode = new_inode_pseudo(sb);
1033a209dfc7SEric Dumazet 	if (inode)
103455fa6091SDave Chinner 		inode_sb_list_add(inode);
10351da177e4SLinus Torvalds 	return inode;
10361da177e4SLinus Torvalds }
10371da177e4SLinus Torvalds EXPORT_SYMBOL(new_inode);
10381da177e4SLinus Torvalds 
103914358e6dSPeter Zijlstra #ifdef CONFIG_DEBUG_LOCK_ALLOC
lockdep_annotate_inode_mutex_key(struct inode * inode)1040e096d0c7SJosh Boyer void lockdep_annotate_inode_mutex_key(struct inode *inode)
1041e096d0c7SJosh Boyer {
1042a3314a0eSNamhyung Kim 	if (S_ISDIR(inode->i_mode)) {
104314358e6dSPeter Zijlstra 		struct file_system_type *type = inode->i_sb->s_type;
10441e89a5e1SPeter Zijlstra 
10459a7aa12fSJan Kara 		/* Set new key only if filesystem hasn't already changed it */
10469902af79SAl Viro 		if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
104714358e6dSPeter Zijlstra 			/*
104814358e6dSPeter Zijlstra 			 * ensure nobody is actually holding i_mutex
104914358e6dSPeter Zijlstra 			 */
10509902af79SAl Viro 			// mutex_destroy(&inode->i_mutex);
10519902af79SAl Viro 			init_rwsem(&inode->i_rwsem);
10529902af79SAl Viro 			lockdep_set_class(&inode->i_rwsem,
10539a7aa12fSJan Kara 					  &type->i_mutex_dir_key);
10549a7aa12fSJan Kara 		}
10551e89a5e1SPeter Zijlstra 	}
1056e096d0c7SJosh Boyer }
1057e096d0c7SJosh Boyer EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
105814358e6dSPeter Zijlstra #endif
1059e096d0c7SJosh Boyer 
1060e096d0c7SJosh Boyer /**
1061e096d0c7SJosh Boyer  * unlock_new_inode - clear the I_NEW state and wake up any waiters
1062e096d0c7SJosh Boyer  * @inode:	new inode to unlock
1063e096d0c7SJosh Boyer  *
1064e096d0c7SJosh Boyer  * Called when the inode is fully initialised to clear the new state of the
1065e096d0c7SJosh Boyer  * inode and wake up anyone waiting for the inode to finish initialisation.
1066e096d0c7SJosh Boyer  */
unlock_new_inode(struct inode * inode)1067e096d0c7SJosh Boyer void unlock_new_inode(struct inode *inode)
1068e096d0c7SJosh Boyer {
1069e096d0c7SJosh Boyer 	lockdep_annotate_inode_mutex_key(inode);
1070250df6edSDave Chinner 	spin_lock(&inode->i_lock);
1071eaff8079SChristoph Hellwig 	WARN_ON(!(inode->i_state & I_NEW));
1072c2b6d621SAl Viro 	inode->i_state &= ~I_NEW & ~I_CREATING;
1073310fa7a3SAl Viro 	smp_mb();
1074250df6edSDave Chinner 	wake_up_bit(&inode->i_state, __I_NEW);
1075250df6edSDave Chinner 	spin_unlock(&inode->i_lock);
10761da177e4SLinus Torvalds }
10771da177e4SLinus Torvalds EXPORT_SYMBOL(unlock_new_inode);
10781da177e4SLinus Torvalds 
discard_new_inode(struct inode * inode)1079c2b6d621SAl Viro void discard_new_inode(struct inode *inode)
1080c2b6d621SAl Viro {
1081c2b6d621SAl Viro 	lockdep_annotate_inode_mutex_key(inode);
1082c2b6d621SAl Viro 	spin_lock(&inode->i_lock);
1083c2b6d621SAl Viro 	WARN_ON(!(inode->i_state & I_NEW));
1084c2b6d621SAl Viro 	inode->i_state &= ~I_NEW;
1085c2b6d621SAl Viro 	smp_mb();
1086c2b6d621SAl Viro 	wake_up_bit(&inode->i_state, __I_NEW);
1087c2b6d621SAl Viro 	spin_unlock(&inode->i_lock);
1088c2b6d621SAl Viro 	iput(inode);
1089c2b6d621SAl Viro }
1090c2b6d621SAl Viro EXPORT_SYMBOL(discard_new_inode);
1091c2b6d621SAl Viro 
10920b2d0724SChristoph Hellwig /**
1093f23ce757SJan Kara  * lock_two_inodes - lock two inodes (may be regular files but also dirs)
1094f23ce757SJan Kara  *
1095f23ce757SJan Kara  * Lock any non-NULL argument. The caller must make sure that if he is passing
1096f23ce757SJan Kara  * in two directories, one is not ancestor of the other.  Zero, one or two
1097f23ce757SJan Kara  * objects may be locked by this function.
1098f23ce757SJan Kara  *
1099f23ce757SJan Kara  * @inode1: first inode to lock
1100f23ce757SJan Kara  * @inode2: second inode to lock
1101f23ce757SJan Kara  * @subclass1: inode lock subclass for the first lock obtained
1102f23ce757SJan Kara  * @subclass2: inode lock subclass for the second lock obtained
1103f23ce757SJan Kara  */
lock_two_inodes(struct inode * inode1,struct inode * inode2,unsigned subclass1,unsigned subclass2)1104f23ce757SJan Kara void lock_two_inodes(struct inode *inode1, struct inode *inode2,
1105f23ce757SJan Kara 		     unsigned subclass1, unsigned subclass2)
1106f23ce757SJan Kara {
1107f23ce757SJan Kara 	if (!inode1 || !inode2) {
1108f23ce757SJan Kara 		/*
1109f23ce757SJan Kara 		 * Make sure @subclass1 will be used for the acquired lock.
1110f23ce757SJan Kara 		 * This is not strictly necessary (no current caller cares) but
1111f23ce757SJan Kara 		 * let's keep things consistent.
1112f23ce757SJan Kara 		 */
1113f23ce757SJan Kara 		if (!inode1)
1114f23ce757SJan Kara 			swap(inode1, inode2);
1115f23ce757SJan Kara 		goto lock;
1116f23ce757SJan Kara 	}
1117f23ce757SJan Kara 
1118f23ce757SJan Kara 	/*
1119f23ce757SJan Kara 	 * If one object is directory and the other is not, we must make sure
1120f23ce757SJan Kara 	 * to lock directory first as the other object may be its child.
1121f23ce757SJan Kara 	 */
1122f23ce757SJan Kara 	if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
1123f23ce757SJan Kara 		if (inode1 > inode2)
1124f23ce757SJan Kara 			swap(inode1, inode2);
1125f23ce757SJan Kara 	} else if (!S_ISDIR(inode1->i_mode))
1126f23ce757SJan Kara 		swap(inode1, inode2);
1127f23ce757SJan Kara lock:
1128f23ce757SJan Kara 	if (inode1)
1129f23ce757SJan Kara 		inode_lock_nested(inode1, subclass1);
1130f23ce757SJan Kara 	if (inode2 && inode2 != inode1)
1131f23ce757SJan Kara 		inode_lock_nested(inode2, subclass2);
1132f23ce757SJan Kara }
1133f23ce757SJan Kara 
1134f23ce757SJan Kara /**
1135375e289eSJ. Bruce Fields  * lock_two_nondirectories - take two i_mutexes on non-directory objects
11364fd699aeSJ. Bruce Fields  *
11372454ad83SJan Kara  * Lock any non-NULL argument. Passed objects must not be directories.
11384fd699aeSJ. Bruce Fields  * Zero, one or two objects may be locked by this function.
11394fd699aeSJ. Bruce Fields  *
1140375e289eSJ. Bruce Fields  * @inode1: first inode to lock
1141375e289eSJ. Bruce Fields  * @inode2: second inode to lock
1142375e289eSJ. Bruce Fields  */
lock_two_nondirectories(struct inode * inode1,struct inode * inode2)1143375e289eSJ. Bruce Fields void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1144375e289eSJ. Bruce Fields {
114533ab231fSChristian Brauner 	if (inode1)
11462454ad83SJan Kara 		WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
114733ab231fSChristian Brauner 	if (inode2)
11482454ad83SJan Kara 		WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
11492454ad83SJan Kara 	lock_two_inodes(inode1, inode2, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
1150375e289eSJ. Bruce Fields }
1151375e289eSJ. Bruce Fields EXPORT_SYMBOL(lock_two_nondirectories);
1152375e289eSJ. Bruce Fields 
1153375e289eSJ. Bruce Fields /**
1154375e289eSJ. Bruce Fields  * unlock_two_nondirectories - release locks from lock_two_nondirectories()
1155375e289eSJ. Bruce Fields  * @inode1: first inode to unlock
1156375e289eSJ. Bruce Fields  * @inode2: second inode to unlock
1157375e289eSJ. Bruce Fields  */
unlock_two_nondirectories(struct inode * inode1,struct inode * inode2)1158375e289eSJ. Bruce Fields void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1159375e289eSJ. Bruce Fields {
11602454ad83SJan Kara 	if (inode1) {
11612454ad83SJan Kara 		WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
11625955102cSAl Viro 		inode_unlock(inode1);
11632454ad83SJan Kara 	}
11642454ad83SJan Kara 	if (inode2 && inode2 != inode1) {
11652454ad83SJan Kara 		WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
11665955102cSAl Viro 		inode_unlock(inode2);
1167375e289eSJ. Bruce Fields 	}
11682454ad83SJan Kara }
1169375e289eSJ. Bruce Fields EXPORT_SYMBOL(unlock_two_nondirectories);
1170375e289eSJ. Bruce Fields 
1171375e289eSJ. Bruce Fields /**
117280ea09a0SMiklos Szeredi  * inode_insert5 - obtain an inode from a mounted file system
117380ea09a0SMiklos Szeredi  * @inode:	pre-allocated inode to use for insert to cache
117480ea09a0SMiklos Szeredi  * @hashval:	hash value (usually inode number) to get
117580ea09a0SMiklos Szeredi  * @test:	callback used for comparisons between inodes
117680ea09a0SMiklos Szeredi  * @set:	callback used to initialize a new struct inode
117780ea09a0SMiklos Szeredi  * @data:	opaque data pointer to pass to @test and @set
117880ea09a0SMiklos Szeredi  *
117980ea09a0SMiklos Szeredi  * Search for the inode specified by @hashval and @data in the inode cache,
118080ea09a0SMiklos Szeredi  * and if present it is return it with an increased reference count. This is
118180ea09a0SMiklos Szeredi  * a variant of iget5_locked() for callers that don't want to fail on memory
118280ea09a0SMiklos Szeredi  * allocation of inode.
118380ea09a0SMiklos Szeredi  *
118480ea09a0SMiklos Szeredi  * If the inode is not in cache, insert the pre-allocated inode to cache and
118580ea09a0SMiklos Szeredi  * return it locked, hashed, and with the I_NEW flag set. The file system gets
118680ea09a0SMiklos Szeredi  * to fill it in before unlocking it via unlock_new_inode().
118780ea09a0SMiklos Szeredi  *
118880ea09a0SMiklos Szeredi  * Note both @test and @set are called with the inode_hash_lock held, so can't
118980ea09a0SMiklos Szeredi  * sleep.
119080ea09a0SMiklos Szeredi  */
inode_insert5(struct inode * inode,unsigned long hashval,int (* test)(struct inode *,void *),int (* set)(struct inode *,void *),void * data)119180ea09a0SMiklos Szeredi struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
119280ea09a0SMiklos Szeredi 			    int (*test)(struct inode *, void *),
119380ea09a0SMiklos Szeredi 			    int (*set)(struct inode *, void *), void *data)
119480ea09a0SMiklos Szeredi {
119580ea09a0SMiklos Szeredi 	struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
119680ea09a0SMiklos Szeredi 	struct inode *old;
119780ea09a0SMiklos Szeredi 
119880ea09a0SMiklos Szeredi again:
119980ea09a0SMiklos Szeredi 	spin_lock(&inode_hash_lock);
120080ea09a0SMiklos Szeredi 	old = find_inode(inode->i_sb, head, test, data);
120180ea09a0SMiklos Szeredi 	if (unlikely(old)) {
120280ea09a0SMiklos Szeredi 		/*
120380ea09a0SMiklos Szeredi 		 * Uhhuh, somebody else created the same inode under us.
120480ea09a0SMiklos Szeredi 		 * Use the old inode instead of the preallocated one.
120580ea09a0SMiklos Szeredi 		 */
120680ea09a0SMiklos Szeredi 		spin_unlock(&inode_hash_lock);
1207c2b6d621SAl Viro 		if (IS_ERR(old))
1208c2b6d621SAl Viro 			return NULL;
120980ea09a0SMiklos Szeredi 		wait_on_inode(old);
121080ea09a0SMiklos Szeredi 		if (unlikely(inode_unhashed(old))) {
121180ea09a0SMiklos Szeredi 			iput(old);
121280ea09a0SMiklos Szeredi 			goto again;
121380ea09a0SMiklos Szeredi 		}
121480ea09a0SMiklos Szeredi 		return old;
121580ea09a0SMiklos Szeredi 	}
121680ea09a0SMiklos Szeredi 
121780ea09a0SMiklos Szeredi 	if (set && unlikely(set(inode, data))) {
121880ea09a0SMiklos Szeredi 		inode = NULL;
121980ea09a0SMiklos Szeredi 		goto unlock;
122080ea09a0SMiklos Szeredi 	}
122180ea09a0SMiklos Szeredi 
122280ea09a0SMiklos Szeredi 	/*
122380ea09a0SMiklos Szeredi 	 * Return the locked inode with I_NEW set, the
122480ea09a0SMiklos Szeredi 	 * caller is responsible for filling in the contents
122580ea09a0SMiklos Szeredi 	 */
122680ea09a0SMiklos Szeredi 	spin_lock(&inode->i_lock);
122780ea09a0SMiklos Szeredi 	inode->i_state |= I_NEW;
12283f19b2abSDavid Howells 	hlist_add_head_rcu(&inode->i_hash, head);
122980ea09a0SMiklos Szeredi 	spin_unlock(&inode->i_lock);
123018cc912bSJeff Layton 
123118cc912bSJeff Layton 	/*
123218cc912bSJeff Layton 	 * Add inode to the sb list if it's not already. It has I_NEW at this
123318cc912bSJeff Layton 	 * point, so it should be safe to test i_sb_list locklessly.
123418cc912bSJeff Layton 	 */
123518cc912bSJeff Layton 	if (list_empty(&inode->i_sb_list))
1236e950564bSMiklos Szeredi 		inode_sb_list_add(inode);
123780ea09a0SMiklos Szeredi unlock:
123880ea09a0SMiklos Szeredi 	spin_unlock(&inode_hash_lock);
123980ea09a0SMiklos Szeredi 
124080ea09a0SMiklos Szeredi 	return inode;
124180ea09a0SMiklos Szeredi }
124280ea09a0SMiklos Szeredi EXPORT_SYMBOL(inode_insert5);
124380ea09a0SMiklos Szeredi 
124480ea09a0SMiklos Szeredi /**
12450b2d0724SChristoph Hellwig  * iget5_locked - obtain an inode from a mounted file system
12460b2d0724SChristoph Hellwig  * @sb:		super block of file system
12470b2d0724SChristoph Hellwig  * @hashval:	hash value (usually inode number) to get
12480b2d0724SChristoph Hellwig  * @test:	callback used for comparisons between inodes
12490b2d0724SChristoph Hellwig  * @set:	callback used to initialize a new struct inode
12500b2d0724SChristoph Hellwig  * @data:	opaque data pointer to pass to @test and @set
12511da177e4SLinus Torvalds  *
12520b2d0724SChristoph Hellwig  * Search for the inode specified by @hashval and @data in the inode cache,
12530b2d0724SChristoph Hellwig  * and if present it is return it with an increased reference count. This is
12540b2d0724SChristoph Hellwig  * a generalized version of iget_locked() for file systems where the inode
12550b2d0724SChristoph Hellwig  * number is not sufficient for unique identification of an inode.
12560b2d0724SChristoph Hellwig  *
12570b2d0724SChristoph Hellwig  * If the inode is not in cache, allocate a new inode and return it locked,
12580b2d0724SChristoph Hellwig  * hashed, and with the I_NEW flag set. The file system gets to fill it in
12590b2d0724SChristoph Hellwig  * before unlocking it via unlock_new_inode().
12600b2d0724SChristoph Hellwig  *
12610b2d0724SChristoph Hellwig  * Note both @test and @set are called with the inode_hash_lock held, so can't
12620b2d0724SChristoph Hellwig  * sleep.
12631da177e4SLinus Torvalds  */
iget5_locked(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),int (* set)(struct inode *,void *),void * data)12640b2d0724SChristoph Hellwig struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
12656b3304b5SManish Katiyar 		int (*test)(struct inode *, void *),
12660b2d0724SChristoph Hellwig 		int (*set)(struct inode *, void *), void *data)
12671da177e4SLinus Torvalds {
126880ea09a0SMiklos Szeredi 	struct inode *inode = ilookup5(sb, hashval, test, data);
12690b2d0724SChristoph Hellwig 
127080ea09a0SMiklos Szeredi 	if (!inode) {
1271e950564bSMiklos Szeredi 		struct inode *new = alloc_inode(sb);
12720b2d0724SChristoph Hellwig 
127380ea09a0SMiklos Szeredi 		if (new) {
1274e950564bSMiklos Szeredi 			new->i_state = 0;
127580ea09a0SMiklos Szeredi 			inode = inode_insert5(new, hashval, test, set, data);
127680ea09a0SMiklos Szeredi 			if (unlikely(inode != new))
1277e950564bSMiklos Szeredi 				destroy_inode(new);
12782864f301SAl Viro 		}
12791da177e4SLinus Torvalds 	}
12801da177e4SLinus Torvalds 	return inode;
12811da177e4SLinus Torvalds }
12820b2d0724SChristoph Hellwig EXPORT_SYMBOL(iget5_locked);
12831da177e4SLinus Torvalds 
12840b2d0724SChristoph Hellwig /**
12850b2d0724SChristoph Hellwig  * iget_locked - obtain an inode from a mounted file system
12860b2d0724SChristoph Hellwig  * @sb:		super block of file system
12870b2d0724SChristoph Hellwig  * @ino:	inode number to get
12880b2d0724SChristoph Hellwig  *
12890b2d0724SChristoph Hellwig  * Search for the inode specified by @ino in the inode cache and if present
12900b2d0724SChristoph Hellwig  * return it with an increased reference count. This is for file systems
12910b2d0724SChristoph Hellwig  * where the inode number is sufficient for unique identification of an inode.
12920b2d0724SChristoph Hellwig  *
12930b2d0724SChristoph Hellwig  * If the inode is not in cache, allocate a new inode and return it locked,
12940b2d0724SChristoph Hellwig  * hashed, and with the I_NEW flag set.  The file system gets to fill it in
12950b2d0724SChristoph Hellwig  * before unlocking it via unlock_new_inode().
12961da177e4SLinus Torvalds  */
iget_locked(struct super_block * sb,unsigned long ino)12970b2d0724SChristoph Hellwig struct inode *iget_locked(struct super_block *sb, unsigned long ino)
12981da177e4SLinus Torvalds {
12990b2d0724SChristoph Hellwig 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
13001da177e4SLinus Torvalds 	struct inode *inode;
13012864f301SAl Viro again:
13020b2d0724SChristoph Hellwig 	spin_lock(&inode_hash_lock);
13030b2d0724SChristoph Hellwig 	inode = find_inode_fast(sb, head, ino);
13040b2d0724SChristoph Hellwig 	spin_unlock(&inode_hash_lock);
13050b2d0724SChristoph Hellwig 	if (inode) {
1306c2b6d621SAl Viro 		if (IS_ERR(inode))
1307c2b6d621SAl Viro 			return NULL;
13080b2d0724SChristoph Hellwig 		wait_on_inode(inode);
13092864f301SAl Viro 		if (unlikely(inode_unhashed(inode))) {
13102864f301SAl Viro 			iput(inode);
13112864f301SAl Viro 			goto again;
13122864f301SAl Viro 		}
13130b2d0724SChristoph Hellwig 		return inode;
13140b2d0724SChristoph Hellwig 	}
13150b2d0724SChristoph Hellwig 
13161da177e4SLinus Torvalds 	inode = alloc_inode(sb);
13171da177e4SLinus Torvalds 	if (inode) {
13181da177e4SLinus Torvalds 		struct inode *old;
13191da177e4SLinus Torvalds 
132067a23c49SDave Chinner 		spin_lock(&inode_hash_lock);
13211da177e4SLinus Torvalds 		/* We released the lock, so.. */
13221da177e4SLinus Torvalds 		old = find_inode_fast(sb, head, ino);
13231da177e4SLinus Torvalds 		if (!old) {
13241da177e4SLinus Torvalds 			inode->i_ino = ino;
1325250df6edSDave Chinner 			spin_lock(&inode->i_lock);
1326eaff8079SChristoph Hellwig 			inode->i_state = I_NEW;
13273f19b2abSDavid Howells 			hlist_add_head_rcu(&inode->i_hash, head);
1328250df6edSDave Chinner 			spin_unlock(&inode->i_lock);
132955fa6091SDave Chinner 			inode_sb_list_add(inode);
133067a23c49SDave Chinner 			spin_unlock(&inode_hash_lock);
13311da177e4SLinus Torvalds 
13321da177e4SLinus Torvalds 			/* Return the locked inode with I_NEW set, the
13331da177e4SLinus Torvalds 			 * caller is responsible for filling in the contents
13341da177e4SLinus Torvalds 			 */
13351da177e4SLinus Torvalds 			return inode;
13361da177e4SLinus Torvalds 		}
13371da177e4SLinus Torvalds 
13381da177e4SLinus Torvalds 		/*
13391da177e4SLinus Torvalds 		 * Uhhuh, somebody else created the same inode under
13401da177e4SLinus Torvalds 		 * us. Use the old inode instead of the one we just
13411da177e4SLinus Torvalds 		 * allocated.
13421da177e4SLinus Torvalds 		 */
134367a23c49SDave Chinner 		spin_unlock(&inode_hash_lock);
13441da177e4SLinus Torvalds 		destroy_inode(inode);
1345c2b6d621SAl Viro 		if (IS_ERR(old))
1346c2b6d621SAl Viro 			return NULL;
13471da177e4SLinus Torvalds 		inode = old;
13481da177e4SLinus Torvalds 		wait_on_inode(inode);
13492864f301SAl Viro 		if (unlikely(inode_unhashed(inode))) {
13502864f301SAl Viro 			iput(inode);
13512864f301SAl Viro 			goto again;
13522864f301SAl Viro 		}
13531da177e4SLinus Torvalds 	}
13541da177e4SLinus Torvalds 	return inode;
13551da177e4SLinus Torvalds }
13560b2d0724SChristoph Hellwig EXPORT_SYMBOL(iget_locked);
13571da177e4SLinus Torvalds 
1358ad5e195aSChristoph Hellwig /*
1359ad5e195aSChristoph Hellwig  * search the inode cache for a matching inode number.
1360ad5e195aSChristoph Hellwig  * If we find one, then the inode number we are trying to
1361ad5e195aSChristoph Hellwig  * allocate is not unique and so we should not use it.
1362ad5e195aSChristoph Hellwig  *
1363ad5e195aSChristoph Hellwig  * Returns 1 if the inode number is unique, 0 if it is not.
1364ad5e195aSChristoph Hellwig  */
test_inode_iunique(struct super_block * sb,unsigned long ino)1365ad5e195aSChristoph Hellwig static int test_inode_iunique(struct super_block *sb, unsigned long ino)
1366ad5e195aSChristoph Hellwig {
1367ad5e195aSChristoph Hellwig 	struct hlist_head *b = inode_hashtable + hash(sb, ino);
1368ad5e195aSChristoph Hellwig 	struct inode *inode;
1369ad5e195aSChristoph Hellwig 
13703f19b2abSDavid Howells 	hlist_for_each_entry_rcu(inode, b, i_hash) {
13713f19b2abSDavid Howells 		if (inode->i_ino == ino && inode->i_sb == sb)
1372ad5e195aSChristoph Hellwig 			return 0;
1373ad5e195aSChristoph Hellwig 	}
1374ad5e195aSChristoph Hellwig 	return 1;
1375ad5e195aSChristoph Hellwig }
1376ad5e195aSChristoph Hellwig 
13771da177e4SLinus Torvalds /**
13781da177e4SLinus Torvalds  *	iunique - get a unique inode number
13791da177e4SLinus Torvalds  *	@sb: superblock
13801da177e4SLinus Torvalds  *	@max_reserved: highest reserved inode number
13811da177e4SLinus Torvalds  *
13821da177e4SLinus Torvalds  *	Obtain an inode number that is unique on the system for a given
13831da177e4SLinus Torvalds  *	superblock. This is used by file systems that have no natural
13841da177e4SLinus Torvalds  *	permanent inode numbering system. An inode number is returned that
13851da177e4SLinus Torvalds  *	is higher than the reserved limit but unique.
13861da177e4SLinus Torvalds  *
13871da177e4SLinus Torvalds  *	BUGS:
13881da177e4SLinus Torvalds  *	With a large number of inodes live on the file system this function
13891da177e4SLinus Torvalds  *	currently becomes quite slow.
13901da177e4SLinus Torvalds  */
iunique(struct super_block * sb,ino_t max_reserved)13911da177e4SLinus Torvalds ino_t iunique(struct super_block *sb, ino_t max_reserved)
13921da177e4SLinus Torvalds {
1393866b04fcSJeff Layton 	/*
1394866b04fcSJeff Layton 	 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
1395866b04fcSJeff Layton 	 * error if st_ino won't fit in target struct field. Use 32bit counter
1396866b04fcSJeff Layton 	 * here to attempt to avoid that.
1397866b04fcSJeff Layton 	 */
1398ad5e195aSChristoph Hellwig 	static DEFINE_SPINLOCK(iunique_lock);
1399866b04fcSJeff Layton 	static unsigned int counter;
14001da177e4SLinus Torvalds 	ino_t res;
14013361c7beSJeffrey Layton 
14023f19b2abSDavid Howells 	rcu_read_lock();
1403ad5e195aSChristoph Hellwig 	spin_lock(&iunique_lock);
14043361c7beSJeffrey Layton 	do {
14053361c7beSJeffrey Layton 		if (counter <= max_reserved)
14063361c7beSJeffrey Layton 			counter = max_reserved + 1;
14071da177e4SLinus Torvalds 		res = counter++;
1408ad5e195aSChristoph Hellwig 	} while (!test_inode_iunique(sb, res));
1409ad5e195aSChristoph Hellwig 	spin_unlock(&iunique_lock);
14103f19b2abSDavid Howells 	rcu_read_unlock();
14113361c7beSJeffrey Layton 
14121da177e4SLinus Torvalds 	return res;
14131da177e4SLinus Torvalds }
14141da177e4SLinus Torvalds EXPORT_SYMBOL(iunique);
14151da177e4SLinus Torvalds 
igrab(struct inode * inode)14161da177e4SLinus Torvalds struct inode *igrab(struct inode *inode)
14171da177e4SLinus Torvalds {
1418250df6edSDave Chinner 	spin_lock(&inode->i_lock);
1419250df6edSDave Chinner 	if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
14201da177e4SLinus Torvalds 		__iget(inode);
1421250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
1422250df6edSDave Chinner 	} else {
1423250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
14241da177e4SLinus Torvalds 		/*
14251da177e4SLinus Torvalds 		 * Handle the case where s_op->clear_inode is not been
14261da177e4SLinus Torvalds 		 * called yet, and somebody is calling igrab
14271da177e4SLinus Torvalds 		 * while the inode is getting freed.
14281da177e4SLinus Torvalds 		 */
14291da177e4SLinus Torvalds 		inode = NULL;
1430250df6edSDave Chinner 	}
14311da177e4SLinus Torvalds 	return inode;
14321da177e4SLinus Torvalds }
14331da177e4SLinus Torvalds EXPORT_SYMBOL(igrab);
14341da177e4SLinus Torvalds 
14351da177e4SLinus Torvalds /**
143688bd5121SAnton Altaparmakov  * ilookup5_nowait - search for an inode in the inode cache
14371da177e4SLinus Torvalds  * @sb:		super block of file system to search
14381da177e4SLinus Torvalds  * @hashval:	hash value (usually inode number) to search for
14391da177e4SLinus Torvalds  * @test:	callback used for comparisons between inodes
14401da177e4SLinus Torvalds  * @data:	opaque data pointer to pass to @test
14411da177e4SLinus Torvalds  *
14420b2d0724SChristoph Hellwig  * Search for the inode specified by @hashval and @data in the inode cache.
14431da177e4SLinus Torvalds  * If the inode is in the cache, the inode is returned with an incremented
14440b2d0724SChristoph Hellwig  * reference count.
144588bd5121SAnton Altaparmakov  *
14460b2d0724SChristoph Hellwig  * Note: I_NEW is not waited upon so you have to be very careful what you do
14470b2d0724SChristoph Hellwig  * with the returned inode.  You probably should be using ilookup5() instead.
144888bd5121SAnton Altaparmakov  *
1449b6d0ad68SRandy Dunlap  * Note2: @test is called with the inode_hash_lock held, so can't sleep.
145088bd5121SAnton Altaparmakov  */
ilookup5_nowait(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)145188bd5121SAnton Altaparmakov struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
145288bd5121SAnton Altaparmakov 		int (*test)(struct inode *, void *), void *data)
145388bd5121SAnton Altaparmakov {
145488bd5121SAnton Altaparmakov 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
14550b2d0724SChristoph Hellwig 	struct inode *inode;
145688bd5121SAnton Altaparmakov 
14570b2d0724SChristoph Hellwig 	spin_lock(&inode_hash_lock);
14580b2d0724SChristoph Hellwig 	inode = find_inode(sb, head, test, data);
14590b2d0724SChristoph Hellwig 	spin_unlock(&inode_hash_lock);
14600b2d0724SChristoph Hellwig 
1461c2b6d621SAl Viro 	return IS_ERR(inode) ? NULL : inode;
146288bd5121SAnton Altaparmakov }
146388bd5121SAnton Altaparmakov EXPORT_SYMBOL(ilookup5_nowait);
146488bd5121SAnton Altaparmakov 
146588bd5121SAnton Altaparmakov /**
146688bd5121SAnton Altaparmakov  * ilookup5 - search for an inode in the inode cache
146788bd5121SAnton Altaparmakov  * @sb:		super block of file system to search
146888bd5121SAnton Altaparmakov  * @hashval:	hash value (usually inode number) to search for
146988bd5121SAnton Altaparmakov  * @test:	callback used for comparisons between inodes
147088bd5121SAnton Altaparmakov  * @data:	opaque data pointer to pass to @test
147188bd5121SAnton Altaparmakov  *
14720b2d0724SChristoph Hellwig  * Search for the inode specified by @hashval and @data in the inode cache,
14730b2d0724SChristoph Hellwig  * and if the inode is in the cache, return the inode with an incremented
14740b2d0724SChristoph Hellwig  * reference count.  Waits on I_NEW before returning the inode.
147588bd5121SAnton Altaparmakov  * returned with an incremented reference count.
14761da177e4SLinus Torvalds  *
14770b2d0724SChristoph Hellwig  * This is a generalized version of ilookup() for file systems where the
14780b2d0724SChristoph Hellwig  * inode number is not sufficient for unique identification of an inode.
14791da177e4SLinus Torvalds  *
14800b2d0724SChristoph Hellwig  * Note: @test is called with the inode_hash_lock held, so can't sleep.
14811da177e4SLinus Torvalds  */
ilookup5(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)14821da177e4SLinus Torvalds struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
14831da177e4SLinus Torvalds 		int (*test)(struct inode *, void *), void *data)
14841da177e4SLinus Torvalds {
14852864f301SAl Viro 	struct inode *inode;
14862864f301SAl Viro again:
14872864f301SAl Viro 	inode = ilookup5_nowait(sb, hashval, test, data);
14882864f301SAl Viro 	if (inode) {
14890b2d0724SChristoph Hellwig 		wait_on_inode(inode);
14902864f301SAl Viro 		if (unlikely(inode_unhashed(inode))) {
14912864f301SAl Viro 			iput(inode);
14922864f301SAl Viro 			goto again;
14932864f301SAl Viro 		}
14942864f301SAl Viro 	}
14950b2d0724SChristoph Hellwig 	return inode;
14961da177e4SLinus Torvalds }
14971da177e4SLinus Torvalds EXPORT_SYMBOL(ilookup5);
14981da177e4SLinus Torvalds 
14991da177e4SLinus Torvalds /**
15001da177e4SLinus Torvalds  * ilookup - search for an inode in the inode cache
15011da177e4SLinus Torvalds  * @sb:		super block of file system to search
15021da177e4SLinus Torvalds  * @ino:	inode number to search for
15031da177e4SLinus Torvalds  *
15040b2d0724SChristoph Hellwig  * Search for the inode @ino in the inode cache, and if the inode is in the
15050b2d0724SChristoph Hellwig  * cache, the inode is returned with an incremented reference count.
15061da177e4SLinus Torvalds  */
ilookup(struct super_block * sb,unsigned long ino)15071da177e4SLinus Torvalds struct inode *ilookup(struct super_block *sb, unsigned long ino)
15081da177e4SLinus Torvalds {
15091da177e4SLinus Torvalds 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
15100b2d0724SChristoph Hellwig 	struct inode *inode;
15112864f301SAl Viro again:
15120b2d0724SChristoph Hellwig 	spin_lock(&inode_hash_lock);
15130b2d0724SChristoph Hellwig 	inode = find_inode_fast(sb, head, ino);
15140b2d0724SChristoph Hellwig 	spin_unlock(&inode_hash_lock);
15150b2d0724SChristoph Hellwig 
15162864f301SAl Viro 	if (inode) {
1517c2b6d621SAl Viro 		if (IS_ERR(inode))
1518c2b6d621SAl Viro 			return NULL;
15190b2d0724SChristoph Hellwig 		wait_on_inode(inode);
15202864f301SAl Viro 		if (unlikely(inode_unhashed(inode))) {
15212864f301SAl Viro 			iput(inode);
15222864f301SAl Viro 			goto again;
15232864f301SAl Viro 		}
15242864f301SAl Viro 	}
15250b2d0724SChristoph Hellwig 	return inode;
15261da177e4SLinus Torvalds }
15271da177e4SLinus Torvalds EXPORT_SYMBOL(ilookup);
15281da177e4SLinus Torvalds 
1529fe032c42STheodore Ts'o /**
1530fe032c42STheodore Ts'o  * find_inode_nowait - find an inode in the inode cache
1531fe032c42STheodore Ts'o  * @sb:		super block of file system to search
1532fe032c42STheodore Ts'o  * @hashval:	hash value (usually inode number) to search for
1533fe032c42STheodore Ts'o  * @match:	callback used for comparisons between inodes
1534fe032c42STheodore Ts'o  * @data:	opaque data pointer to pass to @match
1535fe032c42STheodore Ts'o  *
1536fe032c42STheodore Ts'o  * Search for the inode specified by @hashval and @data in the inode
1537fe032c42STheodore Ts'o  * cache, where the helper function @match will return 0 if the inode
1538fe032c42STheodore Ts'o  * does not match, 1 if the inode does match, and -1 if the search
1539fe032c42STheodore Ts'o  * should be stopped.  The @match function must be responsible for
1540fe032c42STheodore Ts'o  * taking the i_lock spin_lock and checking i_state for an inode being
1541fe032c42STheodore Ts'o  * freed or being initialized, and incrementing the reference count
1542fe032c42STheodore Ts'o  * before returning 1.  It also must not sleep, since it is called with
1543fe032c42STheodore Ts'o  * the inode_hash_lock spinlock held.
1544fe032c42STheodore Ts'o  *
1545fe032c42STheodore Ts'o  * This is a even more generalized version of ilookup5() when the
1546fe032c42STheodore Ts'o  * function must never block --- find_inode() can block in
1547fe032c42STheodore Ts'o  * __wait_on_freeing_inode() --- or when the caller can not increment
1548fe032c42STheodore Ts'o  * the reference count because the resulting iput() might cause an
1549fe032c42STheodore Ts'o  * inode eviction.  The tradeoff is that the @match funtion must be
1550fe032c42STheodore Ts'o  * very carefully implemented.
1551fe032c42STheodore Ts'o  */
find_inode_nowait(struct super_block * sb,unsigned long hashval,int (* match)(struct inode *,unsigned long,void *),void * data)1552fe032c42STheodore Ts'o struct inode *find_inode_nowait(struct super_block *sb,
1553fe032c42STheodore Ts'o 				unsigned long hashval,
1554fe032c42STheodore Ts'o 				int (*match)(struct inode *, unsigned long,
1555fe032c42STheodore Ts'o 					     void *),
1556fe032c42STheodore Ts'o 				void *data)
1557fe032c42STheodore Ts'o {
1558fe032c42STheodore Ts'o 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1559fe032c42STheodore Ts'o 	struct inode *inode, *ret_inode = NULL;
1560fe032c42STheodore Ts'o 	int mval;
1561fe032c42STheodore Ts'o 
1562fe032c42STheodore Ts'o 	spin_lock(&inode_hash_lock);
1563fe032c42STheodore Ts'o 	hlist_for_each_entry(inode, head, i_hash) {
1564fe032c42STheodore Ts'o 		if (inode->i_sb != sb)
1565fe032c42STheodore Ts'o 			continue;
1566fe032c42STheodore Ts'o 		mval = match(inode, hashval, data);
1567fe032c42STheodore Ts'o 		if (mval == 0)
1568fe032c42STheodore Ts'o 			continue;
1569fe032c42STheodore Ts'o 		if (mval == 1)
1570fe032c42STheodore Ts'o 			ret_inode = inode;
1571fe032c42STheodore Ts'o 		goto out;
1572fe032c42STheodore Ts'o 	}
1573fe032c42STheodore Ts'o out:
1574fe032c42STheodore Ts'o 	spin_unlock(&inode_hash_lock);
1575fe032c42STheodore Ts'o 	return ret_inode;
1576fe032c42STheodore Ts'o }
1577fe032c42STheodore Ts'o EXPORT_SYMBOL(find_inode_nowait);
1578fe032c42STheodore Ts'o 
15793f19b2abSDavid Howells /**
15803f19b2abSDavid Howells  * find_inode_rcu - find an inode in the inode cache
15813f19b2abSDavid Howells  * @sb:		Super block of file system to search
15823f19b2abSDavid Howells  * @hashval:	Key to hash
15833f19b2abSDavid Howells  * @test:	Function to test match on an inode
15843f19b2abSDavid Howells  * @data:	Data for test function
15853f19b2abSDavid Howells  *
15863f19b2abSDavid Howells  * Search for the inode specified by @hashval and @data in the inode cache,
15873f19b2abSDavid Howells  * where the helper function @test will return 0 if the inode does not match
15883f19b2abSDavid Howells  * and 1 if it does.  The @test function must be responsible for taking the
15893f19b2abSDavid Howells  * i_lock spin_lock and checking i_state for an inode being freed or being
15903f19b2abSDavid Howells  * initialized.
15913f19b2abSDavid Howells  *
15923f19b2abSDavid Howells  * If successful, this will return the inode for which the @test function
15933f19b2abSDavid Howells  * returned 1 and NULL otherwise.
15943f19b2abSDavid Howells  *
15953f19b2abSDavid Howells  * The @test function is not permitted to take a ref on any inode presented.
15963f19b2abSDavid Howells  * It is also not permitted to sleep.
15973f19b2abSDavid Howells  *
15983f19b2abSDavid Howells  * The caller must hold the RCU read lock.
15993f19b2abSDavid Howells  */
find_inode_rcu(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)16003f19b2abSDavid Howells struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
16013f19b2abSDavid Howells 			     int (*test)(struct inode *, void *), void *data)
16023f19b2abSDavid Howells {
16033f19b2abSDavid Howells 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
16043f19b2abSDavid Howells 	struct inode *inode;
16053f19b2abSDavid Howells 
16063f19b2abSDavid Howells 	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
16073f19b2abSDavid Howells 			 "suspicious find_inode_rcu() usage");
16083f19b2abSDavid Howells 
16093f19b2abSDavid Howells 	hlist_for_each_entry_rcu(inode, head, i_hash) {
16103f19b2abSDavid Howells 		if (inode->i_sb == sb &&
16113f19b2abSDavid Howells 		    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
16123f19b2abSDavid Howells 		    test(inode, data))
16133f19b2abSDavid Howells 			return inode;
16143f19b2abSDavid Howells 	}
16153f19b2abSDavid Howells 	return NULL;
16163f19b2abSDavid Howells }
16173f19b2abSDavid Howells EXPORT_SYMBOL(find_inode_rcu);
16183f19b2abSDavid Howells 
16193f19b2abSDavid Howells /**
1620961f3c89SMauro Carvalho Chehab  * find_inode_by_ino_rcu - Find an inode in the inode cache
16213f19b2abSDavid Howells  * @sb:		Super block of file system to search
16223f19b2abSDavid Howells  * @ino:	The inode number to match
16233f19b2abSDavid Howells  *
16243f19b2abSDavid Howells  * Search for the inode specified by @hashval and @data in the inode cache,
16253f19b2abSDavid Howells  * where the helper function @test will return 0 if the inode does not match
16263f19b2abSDavid Howells  * and 1 if it does.  The @test function must be responsible for taking the
16273f19b2abSDavid Howells  * i_lock spin_lock and checking i_state for an inode being freed or being
16283f19b2abSDavid Howells  * initialized.
16293f19b2abSDavid Howells  *
16303f19b2abSDavid Howells  * If successful, this will return the inode for which the @test function
16313f19b2abSDavid Howells  * returned 1 and NULL otherwise.
16323f19b2abSDavid Howells  *
16333f19b2abSDavid Howells  * The @test function is not permitted to take a ref on any inode presented.
16343f19b2abSDavid Howells  * It is also not permitted to sleep.
16353f19b2abSDavid Howells  *
16363f19b2abSDavid Howells  * The caller must hold the RCU read lock.
16373f19b2abSDavid Howells  */
find_inode_by_ino_rcu(struct super_block * sb,unsigned long ino)16383f19b2abSDavid Howells struct inode *find_inode_by_ino_rcu(struct super_block *sb,
16393f19b2abSDavid Howells 				    unsigned long ino)
16403f19b2abSDavid Howells {
16413f19b2abSDavid Howells 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
16423f19b2abSDavid Howells 	struct inode *inode;
16433f19b2abSDavid Howells 
16443f19b2abSDavid Howells 	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
16453f19b2abSDavid Howells 			 "suspicious find_inode_by_ino_rcu() usage");
16463f19b2abSDavid Howells 
16473f19b2abSDavid Howells 	hlist_for_each_entry_rcu(inode, head, i_hash) {
16483f19b2abSDavid Howells 		if (inode->i_ino == ino &&
16493f19b2abSDavid Howells 		    inode->i_sb == sb &&
16503f19b2abSDavid Howells 		    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
16513f19b2abSDavid Howells 		    return inode;
16523f19b2abSDavid Howells 	}
16533f19b2abSDavid Howells 	return NULL;
16543f19b2abSDavid Howells }
16553f19b2abSDavid Howells EXPORT_SYMBOL(find_inode_by_ino_rcu);
16563f19b2abSDavid Howells 
insert_inode_locked(struct inode * inode)1657261bca86SAl Viro int insert_inode_locked(struct inode *inode)
1658261bca86SAl Viro {
1659261bca86SAl Viro 	struct super_block *sb = inode->i_sb;
1660261bca86SAl Viro 	ino_t ino = inode->i_ino;
1661261bca86SAl Viro 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
1662261bca86SAl Viro 
1663261bca86SAl Viro 	while (1) {
166472a43d63SAl Viro 		struct inode *old = NULL;
166567a23c49SDave Chinner 		spin_lock(&inode_hash_lock);
1666b67bfe0dSSasha Levin 		hlist_for_each_entry(old, head, i_hash) {
166772a43d63SAl Viro 			if (old->i_ino != ino)
166872a43d63SAl Viro 				continue;
166972a43d63SAl Viro 			if (old->i_sb != sb)
167072a43d63SAl Viro 				continue;
1671250df6edSDave Chinner 			spin_lock(&old->i_lock);
1672250df6edSDave Chinner 			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1673250df6edSDave Chinner 				spin_unlock(&old->i_lock);
167472a43d63SAl Viro 				continue;
1675250df6edSDave Chinner 			}
167672a43d63SAl Viro 			break;
167772a43d63SAl Viro 		}
1678b67bfe0dSSasha Levin 		if (likely(!old)) {
1679250df6edSDave Chinner 			spin_lock(&inode->i_lock);
1680c2b6d621SAl Viro 			inode->i_state |= I_NEW | I_CREATING;
16813f19b2abSDavid Howells 			hlist_add_head_rcu(&inode->i_hash, head);
1682250df6edSDave Chinner 			spin_unlock(&inode->i_lock);
168367a23c49SDave Chinner 			spin_unlock(&inode_hash_lock);
1684261bca86SAl Viro 			return 0;
1685261bca86SAl Viro 		}
1686c2b6d621SAl Viro 		if (unlikely(old->i_state & I_CREATING)) {
1687c2b6d621SAl Viro 			spin_unlock(&old->i_lock);
1688c2b6d621SAl Viro 			spin_unlock(&inode_hash_lock);
1689c2b6d621SAl Viro 			return -EBUSY;
1690c2b6d621SAl Viro 		}
1691261bca86SAl Viro 		__iget(old);
1692250df6edSDave Chinner 		spin_unlock(&old->i_lock);
169367a23c49SDave Chinner 		spin_unlock(&inode_hash_lock);
1694261bca86SAl Viro 		wait_on_inode(old);
16951d3382cbSAl Viro 		if (unlikely(!inode_unhashed(old))) {
1696261bca86SAl Viro 			iput(old);
1697261bca86SAl Viro 			return -EBUSY;
1698261bca86SAl Viro 		}
1699261bca86SAl Viro 		iput(old);
1700261bca86SAl Viro 	}
1701261bca86SAl Viro }
1702261bca86SAl Viro EXPORT_SYMBOL(insert_inode_locked);
1703261bca86SAl Viro 
insert_inode_locked4(struct inode * inode,unsigned long hashval,int (* test)(struct inode *,void *),void * data)1704261bca86SAl Viro int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1705261bca86SAl Viro 		int (*test)(struct inode *, void *), void *data)
1706261bca86SAl Viro {
1707c2b6d621SAl Viro 	struct inode *old;
1708c2b6d621SAl Viro 
1709c2b6d621SAl Viro 	inode->i_state |= I_CREATING;
1710c2b6d621SAl Viro 	old = inode_insert5(inode, hashval, test, NULL, data);
1711261bca86SAl Viro 
171280ea09a0SMiklos Szeredi 	if (old != inode) {
1713261bca86SAl Viro 		iput(old);
1714261bca86SAl Viro 		return -EBUSY;
1715261bca86SAl Viro 	}
171680ea09a0SMiklos Szeredi 	return 0;
1717261bca86SAl Viro }
1718261bca86SAl Viro EXPORT_SYMBOL(insert_inode_locked4);
1719261bca86SAl Viro 
17201da177e4SLinus Torvalds 
generic_delete_inode(struct inode * inode)172145321ac5SAl Viro int generic_delete_inode(struct inode *inode)
17221da177e4SLinus Torvalds {
172345321ac5SAl Viro 	return 1;
17241da177e4SLinus Torvalds }
17251da177e4SLinus Torvalds EXPORT_SYMBOL(generic_delete_inode);
17261da177e4SLinus Torvalds 
172745321ac5SAl Viro /*
172845321ac5SAl Viro  * Called when we're dropping the last reference
172945321ac5SAl Viro  * to an inode.
173045321ac5SAl Viro  *
173145321ac5SAl Viro  * Call the FS "drop_inode()" function, defaulting to
173245321ac5SAl Viro  * the legacy UNIX filesystem behaviour.  If it tells
173345321ac5SAl Viro  * us to evict inode, do so.  Otherwise, retain inode
173445321ac5SAl Viro  * in cache if fs is alive, sync and evict if fs is
173545321ac5SAl Viro  * shutting down.
173645321ac5SAl Viro  */
iput_final(struct inode * inode)173745321ac5SAl Viro static void iput_final(struct inode *inode)
17381da177e4SLinus Torvalds {
17391da177e4SLinus Torvalds 	struct super_block *sb = inode->i_sb;
174045321ac5SAl Viro 	const struct super_operations *op = inode->i_sb->s_op;
17413f19b2abSDavid Howells 	unsigned long state;
174245321ac5SAl Viro 	int drop;
17431da177e4SLinus Torvalds 
1744250df6edSDave Chinner 	WARN_ON(inode->i_state & I_NEW);
1745250df6edSDave Chinner 
1746e7f59097SAl Viro 	if (op->drop_inode)
174745321ac5SAl Viro 		drop = op->drop_inode(inode);
174845321ac5SAl Viro 	else
174945321ac5SAl Viro 		drop = generic_drop_inode(inode);
175045321ac5SAl Viro 
175188149082SHao Li 	if (!drop &&
175288149082SHao Li 	    !(inode->i_state & I_DONTCACHE) &&
175388149082SHao Li 	    (sb->s_flags & SB_ACTIVE)) {
175451b8c1feSJohannes Weiner 		__inode_add_lru(inode, true);
1755250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
175645321ac5SAl Viro 		return;
1757991114c6SAlexander Viro 	}
1758b2b2af8eSDave Chinner 
17593f19b2abSDavid Howells 	state = inode->i_state;
1760b2b2af8eSDave Chinner 	if (!drop) {
17613f19b2abSDavid Howells 		WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
1762250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
17633f19b2abSDavid Howells 
17641da177e4SLinus Torvalds 		write_inode_now(inode, 1);
17653f19b2abSDavid Howells 
1766250df6edSDave Chinner 		spin_lock(&inode->i_lock);
17673f19b2abSDavid Howells 		state = inode->i_state;
17683f19b2abSDavid Howells 		WARN_ON(state & I_NEW);
17693f19b2abSDavid Howells 		state &= ~I_WILL_FREE;
17701da177e4SLinus Torvalds 	}
17717ccf19a8SNick Piggin 
17723f19b2abSDavid Howells 	WRITE_ONCE(inode->i_state, state | I_FREEING);
1773c4ae0c65SEric Dumazet 	if (!list_empty(&inode->i_lru))
17749e38d86fSNick Piggin 		inode_lru_list_del(inode);
1775250df6edSDave Chinner 	spin_unlock(&inode->i_lock);
1776b2b2af8eSDave Chinner 
1777b2b2af8eSDave Chinner 	evict(inode);
17781da177e4SLinus Torvalds }
17791da177e4SLinus Torvalds 
17801da177e4SLinus Torvalds /**
17811da177e4SLinus Torvalds  *	iput	- put an inode
17821da177e4SLinus Torvalds  *	@inode: inode to put
17831da177e4SLinus Torvalds  *
17841da177e4SLinus Torvalds  *	Puts an inode, dropping its usage count. If the inode use count hits
17851da177e4SLinus Torvalds  *	zero, the inode is then freed and may also be destroyed.
17861da177e4SLinus Torvalds  *
17871da177e4SLinus Torvalds  *	Consequently, iput() can sleep.
17881da177e4SLinus Torvalds  */
iput(struct inode * inode)17891da177e4SLinus Torvalds void iput(struct inode *inode)
17901da177e4SLinus Torvalds {
17910ae45f63STheodore Ts'o 	if (!inode)
17920ae45f63STheodore Ts'o 		return;
1793a4ffdde6SAl Viro 	BUG_ON(inode->i_state & I_CLEAR);
17940ae45f63STheodore Ts'o retry:
17950ae45f63STheodore Ts'o 	if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
17960ae45f63STheodore Ts'o 		if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
17970ae45f63STheodore Ts'o 			atomic_inc(&inode->i_count);
17980ae45f63STheodore Ts'o 			spin_unlock(&inode->i_lock);
17990ae45f63STheodore Ts'o 			trace_writeback_lazytime_iput(inode);
18000ae45f63STheodore Ts'o 			mark_inode_dirty_sync(inode);
18010ae45f63STheodore Ts'o 			goto retry;
18020ae45f63STheodore Ts'o 		}
18031da177e4SLinus Torvalds 		iput_final(inode);
18041da177e4SLinus Torvalds 	}
18051da177e4SLinus Torvalds }
18061da177e4SLinus Torvalds EXPORT_SYMBOL(iput);
18071da177e4SLinus Torvalds 
180830460e1eSCarlos Maiolino #ifdef CONFIG_BLOCK
18091da177e4SLinus Torvalds /**
18101da177e4SLinus Torvalds  *	bmap	- find a block number in a file
181130460e1eSCarlos Maiolino  *	@inode:  inode owning the block number being requested
181230460e1eSCarlos Maiolino  *	@block: pointer containing the block to find
18131da177e4SLinus Torvalds  *
18142b8e8b55SMauro Carvalho Chehab  *	Replaces the value in ``*block`` with the block number on the device holding
181530460e1eSCarlos Maiolino  *	corresponding to the requested block number in the file.
181630460e1eSCarlos Maiolino  *	That is, asked for block 4 of inode 1 the function will replace the
18172b8e8b55SMauro Carvalho Chehab  *	4 in ``*block``, with disk block relative to the disk start that holds that
181830460e1eSCarlos Maiolino  *	block of the file.
181930460e1eSCarlos Maiolino  *
182030460e1eSCarlos Maiolino  *	Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
18212b8e8b55SMauro Carvalho Chehab  *	hole, returns 0 and ``*block`` is also set to 0.
18221da177e4SLinus Torvalds  */
bmap(struct inode * inode,sector_t * block)182330460e1eSCarlos Maiolino int bmap(struct inode *inode, sector_t *block)
18241da177e4SLinus Torvalds {
182530460e1eSCarlos Maiolino 	if (!inode->i_mapping->a_ops->bmap)
182630460e1eSCarlos Maiolino 		return -EINVAL;
182730460e1eSCarlos Maiolino 
182830460e1eSCarlos Maiolino 	*block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
182930460e1eSCarlos Maiolino 	return 0;
18301da177e4SLinus Torvalds }
18311da177e4SLinus Torvalds EXPORT_SYMBOL(bmap);
183230460e1eSCarlos Maiolino #endif
18331da177e4SLinus Torvalds 
183411ff6f05SMatthew Garrett /*
183511ff6f05SMatthew Garrett  * With relative atime, only update atime if the previous atime is
1836d98ffa1aSStephen Kitt  * earlier than or equal to either the ctime or mtime,
1837d98ffa1aSStephen Kitt  * or if at least a day has passed since the last atime update.
183811ff6f05SMatthew Garrett  */
relatime_need_update(struct vfsmount * mnt,struct inode * inode,struct timespec64 now)1839c6718543SMiklos Szeredi static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
18406f22b664SArnd Bergmann 			     struct timespec64 now)
184111ff6f05SMatthew Garrett {
18422276e5baSJeff Layton 	struct timespec64 ctime;
184311ff6f05SMatthew Garrett 
1844c6718543SMiklos Szeredi 	if (!(mnt->mnt_flags & MNT_RELATIME))
184511ff6f05SMatthew Garrett 		return 1;
184611ff6f05SMatthew Garrett 	/*
1847d98ffa1aSStephen Kitt 	 * Is mtime younger than or equal to atime? If yes, update atime:
184811ff6f05SMatthew Garrett 	 */
184995582b00SDeepa Dinamani 	if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0)
185011ff6f05SMatthew Garrett 		return 1;
185111ff6f05SMatthew Garrett 	/*
1852d98ffa1aSStephen Kitt 	 * Is ctime younger than or equal to atime? If yes, update atime:
185311ff6f05SMatthew Garrett 	 */
18542276e5baSJeff Layton 	ctime = inode_get_ctime(inode);
18552276e5baSJeff Layton 	if (timespec64_compare(&ctime, &inode->i_atime) >= 0)
185611ff6f05SMatthew Garrett 		return 1;
185711ff6f05SMatthew Garrett 
185811ff6f05SMatthew Garrett 	/*
185911ff6f05SMatthew Garrett 	 * Is the previous atime value older than a day? If yes,
186011ff6f05SMatthew Garrett 	 * update atime:
186111ff6f05SMatthew Garrett 	 */
186211ff6f05SMatthew Garrett 	if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
186311ff6f05SMatthew Garrett 		return 1;
186411ff6f05SMatthew Garrett 	/*
186511ff6f05SMatthew Garrett 	 * Good, we can skip the atime update:
186611ff6f05SMatthew Garrett 	 */
186711ff6f05SMatthew Garrett 	return 0;
186811ff6f05SMatthew Garrett }
186911ff6f05SMatthew Garrett 
1870541d4c79SJeff Layton /**
1871541d4c79SJeff Layton  * inode_update_timestamps - update the timestamps on the inode
1872541d4c79SJeff Layton  * @inode: inode to be updated
1873541d4c79SJeff Layton  * @flags: S_* flags that needed to be updated
1874541d4c79SJeff Layton  *
1875541d4c79SJeff Layton  * The update_time function is called when an inode's timestamps need to be
1876541d4c79SJeff Layton  * updated for a read or write operation. This function handles updating the
1877541d4c79SJeff Layton  * actual timestamps. It's up to the caller to ensure that the inode is marked
1878541d4c79SJeff Layton  * dirty appropriately.
1879541d4c79SJeff Layton  *
1880541d4c79SJeff Layton  * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated,
1881541d4c79SJeff Layton  * attempt to update all three of them. S_ATIME updates can be handled
1882541d4c79SJeff Layton  * independently of the rest.
1883541d4c79SJeff Layton  *
1884541d4c79SJeff Layton  * Returns a set of S_* flags indicating which values changed.
1885541d4c79SJeff Layton  */
inode_update_timestamps(struct inode * inode,int flags)1886541d4c79SJeff Layton int inode_update_timestamps(struct inode *inode, int flags)
1887c3b2da31SJosef Bacik {
1888541d4c79SJeff Layton 	int updated = 0;
1889541d4c79SJeff Layton 	struct timespec64 now;
1890c3b2da31SJosef Bacik 
1891541d4c79SJeff Layton 	if (flags & (S_MTIME|S_CTIME|S_VERSION)) {
1892541d4c79SJeff Layton 		struct timespec64 ctime = inode_get_ctime(inode);
18930ae45f63STheodore Ts'o 
1894541d4c79SJeff Layton 		now = inode_set_ctime_current(inode);
1895541d4c79SJeff Layton 		if (!timespec64_equal(&now, &ctime))
1896541d4c79SJeff Layton 			updated |= S_CTIME;
1897541d4c79SJeff Layton 		if (!timespec64_equal(&now, &inode->i_mtime)) {
1898541d4c79SJeff Layton 			inode->i_mtime = now;
1899541d4c79SJeff Layton 			updated |= S_MTIME;
1900541d4c79SJeff Layton 		}
1901541d4c79SJeff Layton 		if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated))
1902541d4c79SJeff Layton 			updated |= S_VERSION;
1903541d4c79SJeff Layton 	} else {
1904541d4c79SJeff Layton 		now = current_time(inode);
1905e20b14dbSEric Biggers 	}
1906e20b14dbSEric Biggers 
1907541d4c79SJeff Layton 	if (flags & S_ATIME) {
1908541d4c79SJeff Layton 		if (!timespec64_equal(&now, &inode->i_atime)) {
1909541d4c79SJeff Layton 			inode->i_atime = now;
1910541d4c79SJeff Layton 			updated |= S_ATIME;
1911541d4c79SJeff Layton 		}
1912541d4c79SJeff Layton 	}
1913541d4c79SJeff Layton 	return updated;
1914541d4c79SJeff Layton }
1915541d4c79SJeff Layton EXPORT_SYMBOL(inode_update_timestamps);
1916e20b14dbSEric Biggers 
1917541d4c79SJeff Layton /**
1918541d4c79SJeff Layton  * generic_update_time - update the timestamps on the inode
1919541d4c79SJeff Layton  * @inode: inode to be updated
1920541d4c79SJeff Layton  * @flags: S_* flags that needed to be updated
1921541d4c79SJeff Layton  *
1922541d4c79SJeff Layton  * The update_time function is called when an inode's timestamps need to be
1923541d4c79SJeff Layton  * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME,
1924541d4c79SJeff Layton  * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME
1925541d4c79SJeff Layton  * updates can be handled done independently of the rest.
1926541d4c79SJeff Layton  *
1927541d4c79SJeff Layton  * Returns a S_* mask indicating which fields were updated.
1928541d4c79SJeff Layton  */
generic_update_time(struct inode * inode,int flags)1929541d4c79SJeff Layton int generic_update_time(struct inode *inode, int flags)
1930541d4c79SJeff Layton {
1931541d4c79SJeff Layton 	int updated = inode_update_timestamps(inode, flags);
1932541d4c79SJeff Layton 	int dirty_flags = 0;
1933541d4c79SJeff Layton 
1934541d4c79SJeff Layton 	if (updated & (S_ATIME|S_MTIME|S_CTIME))
1935541d4c79SJeff Layton 		dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC;
1936541d4c79SJeff Layton 	if (updated & S_VERSION)
1937541d4c79SJeff Layton 		dirty_flags |= I_DIRTY_SYNC;
1938e20b14dbSEric Biggers 	__mark_inode_dirty(inode, dirty_flags);
1939541d4c79SJeff Layton 	return updated;
1940c3b2da31SJosef Bacik }
19410ae45f63STheodore Ts'o EXPORT_SYMBOL(generic_update_time);
19420ae45f63STheodore Ts'o 
19430ae45f63STheodore Ts'o /*
19440ae45f63STheodore Ts'o  * This does the actual work of updating an inodes time or version.  Must have
19450ae45f63STheodore Ts'o  * had called mnt_want_write() before calling this.
19460ae45f63STheodore Ts'o  */
inode_update_time(struct inode * inode,int flags)1947913e9928SJeff Layton int inode_update_time(struct inode *inode, int flags)
19480ae45f63STheodore Ts'o {
194923b424d9SDeepa Dinamani 	if (inode->i_op->update_time)
1950913e9928SJeff Layton 		return inode->i_op->update_time(inode, flags);
1951541d4c79SJeff Layton 	generic_update_time(inode, flags);
1952541d4c79SJeff Layton 	return 0;
19530ae45f63STheodore Ts'o }
1954e60feb44SJosef Bacik EXPORT_SYMBOL(inode_update_time);
1955c3b2da31SJosef Bacik 
19561da177e4SLinus Torvalds /**
1957961f3c89SMauro Carvalho Chehab  *	atime_needs_update	-	update the access time
1958185553b2SRandy Dunlap  *	@path: the &struct path to update
195930fdc8eeSRandy Dunlap  *	@inode: inode to update
19601da177e4SLinus Torvalds  *
19611da177e4SLinus Torvalds  *	Update the accessed time on an inode and mark it for writeback.
19621da177e4SLinus Torvalds  *	This function automatically handles read only file systems and media,
19631da177e4SLinus Torvalds  *	as well as the "noatime" flag and inode specific "noatime" markers.
19641da177e4SLinus Torvalds  */
atime_needs_update(const struct path * path,struct inode * inode)1965c6718543SMiklos Szeredi bool atime_needs_update(const struct path *path, struct inode *inode)
19668fa9dd24SNeilBrown {
19678fa9dd24SNeilBrown 	struct vfsmount *mnt = path->mnt;
196895582b00SDeepa Dinamani 	struct timespec64 now;
19698fa9dd24SNeilBrown 
19708fa9dd24SNeilBrown 	if (inode->i_flags & S_NOATIME)
19718fa9dd24SNeilBrown 		return false;
19720bd23d09SEric W. Biederman 
19730bd23d09SEric W. Biederman 	/* Atime updates will likely cause i_uid and i_gid to be written
19740bd23d09SEric W. Biederman 	 * back improprely if their true value is unknown to the vfs.
19750bd23d09SEric W. Biederman 	 */
19764609e1f1SChristian Brauner 	if (HAS_UNMAPPED_ID(mnt_idmap(mnt), inode))
19770bd23d09SEric W. Biederman 		return false;
19780bd23d09SEric W. Biederman 
19798fa9dd24SNeilBrown 	if (IS_NOATIME(inode))
19808fa9dd24SNeilBrown 		return false;
19811751e8a6SLinus Torvalds 	if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
19828fa9dd24SNeilBrown 		return false;
19838fa9dd24SNeilBrown 
19848fa9dd24SNeilBrown 	if (mnt->mnt_flags & MNT_NOATIME)
19858fa9dd24SNeilBrown 		return false;
19868fa9dd24SNeilBrown 	if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
19878fa9dd24SNeilBrown 		return false;
19888fa9dd24SNeilBrown 
1989c2050a45SDeepa Dinamani 	now = current_time(inode);
19908fa9dd24SNeilBrown 
19916f22b664SArnd Bergmann 	if (!relatime_need_update(mnt, inode, now))
19928fa9dd24SNeilBrown 		return false;
19938fa9dd24SNeilBrown 
199495582b00SDeepa Dinamani 	if (timespec64_equal(&inode->i_atime, &now))
19958fa9dd24SNeilBrown 		return false;
19968fa9dd24SNeilBrown 
19978fa9dd24SNeilBrown 	return true;
19988fa9dd24SNeilBrown }
19998fa9dd24SNeilBrown 
touch_atime(const struct path * path)2000badcf2b7SAl Viro void touch_atime(const struct path *path)
20011da177e4SLinus Torvalds {
200268ac1234SAl Viro 	struct vfsmount *mnt = path->mnt;
2003df2b1afdSDavid Howells 	struct inode *inode = d_inode(path->dentry);
20041da177e4SLinus Torvalds 
2005c6718543SMiklos Szeredi 	if (!atime_needs_update(path, inode))
2006b12536c2SAndi Kleen 		return;
2007b12536c2SAndi Kleen 
20085d37e9e6SJan Kara 	if (!sb_start_write_trylock(inode->i_sb))
2009b12536c2SAndi Kleen 		return;
201047ae32d6SValerie Henson 
20118fa9dd24SNeilBrown 	if (__mnt_want_write(mnt) != 0)
20125d37e9e6SJan Kara 		goto skip_update;
2013c3b2da31SJosef Bacik 	/*
2014c3b2da31SJosef Bacik 	 * File systems can error out when updating inodes if they need to
2015c3b2da31SJosef Bacik 	 * allocate new space to modify an inode (such is the case for
2016c3b2da31SJosef Bacik 	 * Btrfs), but since we touch atime while walking down the path we
2017c3b2da31SJosef Bacik 	 * really don't care if we failed to update the atime of the file,
2018c3b2da31SJosef Bacik 	 * so just ignore the return value.
20192bc55652SAlexander Block 	 * We may also fail on filesystems that have the ability to make parts
20202bc55652SAlexander Block 	 * of the fs read only, e.g. subvolumes in Btrfs.
2021c3b2da31SJosef Bacik 	 */
2022913e9928SJeff Layton 	inode_update_time(inode, S_ATIME);
20235d37e9e6SJan Kara 	__mnt_drop_write(mnt);
20245d37e9e6SJan Kara skip_update:
20255d37e9e6SJan Kara 	sb_end_write(inode->i_sb);
20261da177e4SLinus Torvalds }
2027869243a0SChristoph Hellwig EXPORT_SYMBOL(touch_atime);
20281da177e4SLinus Torvalds 
20293ed37648SCong Wang /*
2030dbfae0cdSJan Kara  * Return mask of changes for notify_change() that need to be done as a
2031dbfae0cdSJan Kara  * response to write or truncate. Return 0 if nothing has to be changed.
2032dbfae0cdSJan Kara  * Negative value on error (change should be denied).
2033dbfae0cdSJan Kara  */
dentry_needs_remove_privs(struct mnt_idmap * idmap,struct dentry * dentry)20349452e93eSChristian Brauner int dentry_needs_remove_privs(struct mnt_idmap *idmap,
2035ed5a7047SChristian Brauner 			      struct dentry *dentry)
2036dbfae0cdSJan Kara {
2037dbfae0cdSJan Kara 	struct inode *inode = d_inode(dentry);
2038dbfae0cdSJan Kara 	int mask = 0;
2039dbfae0cdSJan Kara 	int ret;
2040dbfae0cdSJan Kara 
2041dbfae0cdSJan Kara 	if (IS_NOSEC(inode))
2042dbfae0cdSJan Kara 		return 0;
2043dbfae0cdSJan Kara 
20449452e93eSChristian Brauner 	mask = setattr_should_drop_suidgid(idmap, inode);
2045dbfae0cdSJan Kara 	ret = security_inode_need_killpriv(dentry);
2046dbfae0cdSJan Kara 	if (ret < 0)
2047dbfae0cdSJan Kara 		return ret;
2048dbfae0cdSJan Kara 	if (ret)
2049dbfae0cdSJan Kara 		mask |= ATTR_KILL_PRIV;
2050dbfae0cdSJan Kara 	return mask;
2051dbfae0cdSJan Kara }
2052dbfae0cdSJan Kara 
__remove_privs(struct mnt_idmap * idmap,struct dentry * dentry,int kill)2053abf08576SChristian Brauner static int __remove_privs(struct mnt_idmap *idmap,
2054643fe55aSChristian Brauner 			  struct dentry *dentry, int kill)
20553ed37648SCong Wang {
20563ed37648SCong Wang 	struct iattr newattrs;
20573ed37648SCong Wang 
20583ed37648SCong Wang 	newattrs.ia_valid = ATTR_FORCE | kill;
205927ac0ffeSJ. Bruce Fields 	/*
206027ac0ffeSJ. Bruce Fields 	 * Note we call this on write, so notify_change will not
206127ac0ffeSJ. Bruce Fields 	 * encounter any conflicting delegations:
206227ac0ffeSJ. Bruce Fields 	 */
2063abf08576SChristian Brauner 	return notify_change(idmap, dentry, &newattrs, NULL);
20643ed37648SCong Wang }
20653ed37648SCong Wang 
__file_remove_privs(struct file * file,unsigned int flags)2066faf99b56SStefan Roesch static int __file_remove_privs(struct file *file, unsigned int flags)
20673ed37648SCong Wang {
2068c1892c37SMiklos Szeredi 	struct dentry *dentry = file_dentry(file);
2069c1892c37SMiklos Szeredi 	struct inode *inode = file_inode(file);
207041191cf6SStefan Roesch 	int error = 0;
2071dbfae0cdSJan Kara 	int kill;
20723ed37648SCong Wang 
2073f69e749aSAlexander Lochmann 	if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
20743ed37648SCong Wang 		return 0;
20753ed37648SCong Wang 
20769452e93eSChristian Brauner 	kill = dentry_needs_remove_privs(file_mnt_idmap(file), dentry);
207741191cf6SStefan Roesch 	if (kill < 0)
2078dbfae0cdSJan Kara 		return kill;
2079faf99b56SStefan Roesch 
208041191cf6SStefan Roesch 	if (kill) {
2081faf99b56SStefan Roesch 		if (flags & IOCB_NOWAIT)
2082faf99b56SStefan Roesch 			return -EAGAIN;
2083faf99b56SStefan Roesch 
2084abf08576SChristian Brauner 		error = __remove_privs(file_mnt_idmap(file), dentry, kill);
208541191cf6SStefan Roesch 	}
208641191cf6SStefan Roesch 
20872426f391SJan Kara 	if (!error)
20882426f391SJan Kara 		inode_has_no_xattr(inode);
20893ed37648SCong Wang 	return error;
20903ed37648SCong Wang }
2091faf99b56SStefan Roesch 
2092faf99b56SStefan Roesch /**
2093faf99b56SStefan Roesch  * file_remove_privs - remove special file privileges (suid, capabilities)
2094faf99b56SStefan Roesch  * @file: file to remove privileges from
2095faf99b56SStefan Roesch  *
2096faf99b56SStefan Roesch  * When file is modified by a write or truncation ensure that special
2097faf99b56SStefan Roesch  * file privileges are removed.
2098faf99b56SStefan Roesch  *
2099faf99b56SStefan Roesch  * Return: 0 on success, negative errno on failure.
2100faf99b56SStefan Roesch  */
file_remove_privs(struct file * file)2101faf99b56SStefan Roesch int file_remove_privs(struct file *file)
2102faf99b56SStefan Roesch {
2103faf99b56SStefan Roesch 	return __file_remove_privs(file, 0);
2104faf99b56SStefan Roesch }
21055fa8e0a1SJan Kara EXPORT_SYMBOL(file_remove_privs);
21063ed37648SCong Wang 
inode_needs_update_time(struct inode * inode)2107913e9928SJeff Layton static int inode_needs_update_time(struct inode *inode)
21081da177e4SLinus Torvalds {
2109c3b2da31SJosef Bacik 	int sync_it = 0;
2110647aa768SChristian Brauner 	struct timespec64 now = current_time(inode);
21112276e5baSJeff Layton 	struct timespec64 ctime;
21121da177e4SLinus Torvalds 
2113ce06e0b2SAndi Kleen 	/* First try to exhaust all avenues to not sync */
21141da177e4SLinus Torvalds 	if (IS_NOCMTIME(inode))
2115c3b2da31SJosef Bacik 		return 0;
211620ddee2cSDave Hansen 
2117913e9928SJeff Layton 	if (!timespec64_equal(&inode->i_mtime, &now))
2118ce06e0b2SAndi Kleen 		sync_it = S_MTIME;
2119ce06e0b2SAndi Kleen 
21202276e5baSJeff Layton 	ctime = inode_get_ctime(inode);
2121913e9928SJeff Layton 	if (!timespec64_equal(&ctime, &now))
2122ce06e0b2SAndi Kleen 		sync_it |= S_CTIME;
2123ce06e0b2SAndi Kleen 
2124e38cf302SJeff Layton 	if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
2125ce06e0b2SAndi Kleen 		sync_it |= S_VERSION;
2126ce06e0b2SAndi Kleen 
21276a2aa5d8SStefan Roesch 	return sync_it;
21286a2aa5d8SStefan Roesch }
2129ed97bd37SAndreas Mohr 
__file_update_time(struct file * file,int sync_mode)2130913e9928SJeff Layton static int __file_update_time(struct file *file, int sync_mode)
21316a2aa5d8SStefan Roesch {
21326a2aa5d8SStefan Roesch 	int ret = 0;
21336a2aa5d8SStefan Roesch 	struct inode *inode = file_inode(file);
21346a2aa5d8SStefan Roesch 
21356a2aa5d8SStefan Roesch 	/* try to update time settings */
21366a2aa5d8SStefan Roesch 	if (!__mnt_want_write_file(file)) {
2137913e9928SJeff Layton 		ret = inode_update_time(inode, sync_mode);
2138eb04c282SJan Kara 		__mnt_drop_write_file(file);
21396a2aa5d8SStefan Roesch 	}
2140c3b2da31SJosef Bacik 
2141c3b2da31SJosef Bacik 	return ret;
21421da177e4SLinus Torvalds }
21436a2aa5d8SStefan Roesch 
21446a2aa5d8SStefan Roesch /**
21456a2aa5d8SStefan Roesch  * file_update_time - update mtime and ctime time
21466a2aa5d8SStefan Roesch  * @file: file accessed
21476a2aa5d8SStefan Roesch  *
21486a2aa5d8SStefan Roesch  * Update the mtime and ctime members of an inode and mark the inode for
21496a2aa5d8SStefan Roesch  * writeback. Note that this function is meant exclusively for usage in
21506a2aa5d8SStefan Roesch  * the file write path of filesystems, and filesystems may choose to
21516a2aa5d8SStefan Roesch  * explicitly ignore updates via this function with the _NOCMTIME inode
21526a2aa5d8SStefan Roesch  * flag, e.g. for network filesystem where these imestamps are handled
21536a2aa5d8SStefan Roesch  * by the server. This can return an error for file systems who need to
21546a2aa5d8SStefan Roesch  * allocate space in order to update an inode.
21556a2aa5d8SStefan Roesch  *
21566a2aa5d8SStefan Roesch  * Return: 0 on success, negative errno on failure.
21576a2aa5d8SStefan Roesch  */
file_update_time(struct file * file)21586a2aa5d8SStefan Roesch int file_update_time(struct file *file)
21596a2aa5d8SStefan Roesch {
21606a2aa5d8SStefan Roesch 	int ret;
21616a2aa5d8SStefan Roesch 	struct inode *inode = file_inode(file);
21626a2aa5d8SStefan Roesch 
2163913e9928SJeff Layton 	ret = inode_needs_update_time(inode);
21646a2aa5d8SStefan Roesch 	if (ret <= 0)
21656a2aa5d8SStefan Roesch 		return ret;
21666a2aa5d8SStefan Roesch 
2167913e9928SJeff Layton 	return __file_update_time(file, ret);
21686a2aa5d8SStefan Roesch }
2169870f4817SChristoph Hellwig EXPORT_SYMBOL(file_update_time);
21701da177e4SLinus Torvalds 
2171faf99b56SStefan Roesch /**
217266fa3cedSStefan Roesch  * file_modified_flags - handle mandated vfs changes when modifying a file
217366fa3cedSStefan Roesch  * @file: file that was modified
217466fa3cedSStefan Roesch  * @flags: kiocb flags
217566fa3cedSStefan Roesch  *
217666fa3cedSStefan Roesch  * When file has been modified ensure that special
217766fa3cedSStefan Roesch  * file privileges are removed and time settings are updated.
217866fa3cedSStefan Roesch  *
217966fa3cedSStefan Roesch  * If IOCB_NOWAIT is set, special file privileges will not be removed and
218066fa3cedSStefan Roesch  * time settings will not be updated. It will return -EAGAIN.
218166fa3cedSStefan Roesch  *
218266fa3cedSStefan Roesch  * Context: Caller must hold the file's inode lock.
218366fa3cedSStefan Roesch  *
218466fa3cedSStefan Roesch  * Return: 0 on success, negative errno on failure.
218566fa3cedSStefan Roesch  */
file_modified_flags(struct file * file,int flags)218666fa3cedSStefan Roesch static int file_modified_flags(struct file *file, int flags)
218766fa3cedSStefan Roesch {
218866fa3cedSStefan Roesch 	int ret;
218966fa3cedSStefan Roesch 	struct inode *inode = file_inode(file);
219066fa3cedSStefan Roesch 
219166fa3cedSStefan Roesch 	/*
219266fa3cedSStefan Roesch 	 * Clear the security bits if the process is not being run by root.
219366fa3cedSStefan Roesch 	 * This keeps people from modifying setuid and setgid binaries.
219466fa3cedSStefan Roesch 	 */
219566fa3cedSStefan Roesch 	ret = __file_remove_privs(file, flags);
219666fa3cedSStefan Roesch 	if (ret)
219766fa3cedSStefan Roesch 		return ret;
219866fa3cedSStefan Roesch 
219966fa3cedSStefan Roesch 	if (unlikely(file->f_mode & FMODE_NOCMTIME))
220066fa3cedSStefan Roesch 		return 0;
220166fa3cedSStefan Roesch 
2202913e9928SJeff Layton 	ret = inode_needs_update_time(inode);
220366fa3cedSStefan Roesch 	if (ret <= 0)
220466fa3cedSStefan Roesch 		return ret;
220566fa3cedSStefan Roesch 	if (flags & IOCB_NOWAIT)
220666fa3cedSStefan Roesch 		return -EAGAIN;
220766fa3cedSStefan Roesch 
2208913e9928SJeff Layton 	return __file_update_time(file, ret);
220966fa3cedSStefan Roesch }
221066fa3cedSStefan Roesch 
221166fa3cedSStefan Roesch /**
2212faf99b56SStefan Roesch  * file_modified - handle mandated vfs changes when modifying a file
2213faf99b56SStefan Roesch  * @file: file that was modified
2214faf99b56SStefan Roesch  *
2215faf99b56SStefan Roesch  * When file has been modified ensure that special
2216faf99b56SStefan Roesch  * file privileges are removed and time settings are updated.
2217faf99b56SStefan Roesch  *
2218faf99b56SStefan Roesch  * Context: Caller must hold the file's inode lock.
2219faf99b56SStefan Roesch  *
2220faf99b56SStefan Roesch  * Return: 0 on success, negative errno on failure.
2221faf99b56SStefan Roesch  */
file_modified(struct file * file)2222e38f7f53SAmir Goldstein int file_modified(struct file *file)
2223e38f7f53SAmir Goldstein {
222466fa3cedSStefan Roesch 	return file_modified_flags(file, 0);
2225e38f7f53SAmir Goldstein }
2226e38f7f53SAmir Goldstein EXPORT_SYMBOL(file_modified);
2227e38f7f53SAmir Goldstein 
222866fa3cedSStefan Roesch /**
222966fa3cedSStefan Roesch  * kiocb_modified - handle mandated vfs changes when modifying a file
223066fa3cedSStefan Roesch  * @iocb: iocb that was modified
223166fa3cedSStefan Roesch  *
223266fa3cedSStefan Roesch  * When file has been modified ensure that special
223366fa3cedSStefan Roesch  * file privileges are removed and time settings are updated.
223466fa3cedSStefan Roesch  *
223566fa3cedSStefan Roesch  * Context: Caller must hold the file's inode lock.
223666fa3cedSStefan Roesch  *
223766fa3cedSStefan Roesch  * Return: 0 on success, negative errno on failure.
223866fa3cedSStefan Roesch  */
kiocb_modified(struct kiocb * iocb)223966fa3cedSStefan Roesch int kiocb_modified(struct kiocb *iocb)
224066fa3cedSStefan Roesch {
224166fa3cedSStefan Roesch 	return file_modified_flags(iocb->ki_filp, iocb->ki_flags);
224266fa3cedSStefan Roesch }
224366fa3cedSStefan Roesch EXPORT_SYMBOL_GPL(kiocb_modified);
224466fa3cedSStefan Roesch 
inode_needs_sync(struct inode * inode)22451da177e4SLinus Torvalds int inode_needs_sync(struct inode *inode)
22461da177e4SLinus Torvalds {
22471da177e4SLinus Torvalds 	if (IS_SYNC(inode))
22481da177e4SLinus Torvalds 		return 1;
22491da177e4SLinus Torvalds 	if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
22501da177e4SLinus Torvalds 		return 1;
22511da177e4SLinus Torvalds 	return 0;
22521da177e4SLinus Torvalds }
22531da177e4SLinus Torvalds EXPORT_SYMBOL(inode_needs_sync);
22541da177e4SLinus Torvalds 
22551da177e4SLinus Torvalds /*
2256168a9fd6SMiklos Szeredi  * If we try to find an inode in the inode hash while it is being
2257168a9fd6SMiklos Szeredi  * deleted, we have to wait until the filesystem completes its
2258168a9fd6SMiklos Szeredi  * deletion before reporting that it isn't found.  This function waits
2259168a9fd6SMiklos Szeredi  * until the deletion _might_ have completed.  Callers are responsible
2260168a9fd6SMiklos Szeredi  * to recheck inode state.
2261168a9fd6SMiklos Szeredi  *
2262eaff8079SChristoph Hellwig  * It doesn't matter if I_NEW is not set initially, a call to
2263250df6edSDave Chinner  * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
2264250df6edSDave Chinner  * will DTRT.
22651da177e4SLinus Torvalds  */
__wait_on_freeing_inode(struct inode * inode)22661da177e4SLinus Torvalds static void __wait_on_freeing_inode(struct inode *inode)
22671da177e4SLinus Torvalds {
22681da177e4SLinus Torvalds 	wait_queue_head_t *wq;
2269eaff8079SChristoph Hellwig 	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
2270eaff8079SChristoph Hellwig 	wq = bit_waitqueue(&inode->i_state, __I_NEW);
227121417136SIngo Molnar 	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
2272250df6edSDave Chinner 	spin_unlock(&inode->i_lock);
227367a23c49SDave Chinner 	spin_unlock(&inode_hash_lock);
22741da177e4SLinus Torvalds 	schedule();
227521417136SIngo Molnar 	finish_wait(wq, &wait.wq_entry);
227667a23c49SDave Chinner 	spin_lock(&inode_hash_lock);
22771da177e4SLinus Torvalds }
22781da177e4SLinus Torvalds 
22791da177e4SLinus Torvalds static __initdata unsigned long ihash_entries;
set_ihash_entries(char * str)22801da177e4SLinus Torvalds static int __init set_ihash_entries(char *str)
22811da177e4SLinus Torvalds {
22821da177e4SLinus Torvalds 	if (!str)
22831da177e4SLinus Torvalds 		return 0;
22841da177e4SLinus Torvalds 	ihash_entries = simple_strtoul(str, &str, 0);
22851da177e4SLinus Torvalds 	return 1;
22861da177e4SLinus Torvalds }
22871da177e4SLinus Torvalds __setup("ihash_entries=", set_ihash_entries);
22881da177e4SLinus Torvalds 
22891da177e4SLinus Torvalds /*
22901da177e4SLinus Torvalds  * Initialize the waitqueues and inode hash table.
22911da177e4SLinus Torvalds  */
inode_init_early(void)22921da177e4SLinus Torvalds void __init inode_init_early(void)
22931da177e4SLinus Torvalds {
22941da177e4SLinus Torvalds 	/* If hashes are distributed across NUMA nodes, defer
22951da177e4SLinus Torvalds 	 * hash allocation until vmalloc space is available.
22961da177e4SLinus Torvalds 	 */
22971da177e4SLinus Torvalds 	if (hashdist)
22981da177e4SLinus Torvalds 		return;
22991da177e4SLinus Torvalds 
23001da177e4SLinus Torvalds 	inode_hashtable =
23011da177e4SLinus Torvalds 		alloc_large_system_hash("Inode-cache",
23021da177e4SLinus Torvalds 					sizeof(struct hlist_head),
23031da177e4SLinus Torvalds 					ihash_entries,
23041da177e4SLinus Torvalds 					14,
23053d375d78SPavel Tatashin 					HASH_EARLY | HASH_ZERO,
23061da177e4SLinus Torvalds 					&i_hash_shift,
23071da177e4SLinus Torvalds 					&i_hash_mask,
230831fe62b9STim Bird 					0,
23091da177e4SLinus Torvalds 					0);
23101da177e4SLinus Torvalds }
23111da177e4SLinus Torvalds 
inode_init(void)231274bf17cfSDenis Cheng void __init inode_init(void)
23131da177e4SLinus Torvalds {
23141da177e4SLinus Torvalds 	/* inode slab cache */
2315b0196009SPaul Jackson 	inode_cachep = kmem_cache_create("inode_cache",
2316b0196009SPaul Jackson 					 sizeof(struct inode),
2317b0196009SPaul Jackson 					 0,
2318b0196009SPaul Jackson 					 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
23195d097056SVladimir Davydov 					 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
232020c2df83SPaul Mundt 					 init_once);
23211da177e4SLinus Torvalds 
23221da177e4SLinus Torvalds 	/* Hash may have been set up in inode_init_early */
23231da177e4SLinus Torvalds 	if (!hashdist)
23241da177e4SLinus Torvalds 		return;
23251da177e4SLinus Torvalds 
23261da177e4SLinus Torvalds 	inode_hashtable =
23271da177e4SLinus Torvalds 		alloc_large_system_hash("Inode-cache",
23281da177e4SLinus Torvalds 					sizeof(struct hlist_head),
23291da177e4SLinus Torvalds 					ihash_entries,
23301da177e4SLinus Torvalds 					14,
23313d375d78SPavel Tatashin 					HASH_ZERO,
23321da177e4SLinus Torvalds 					&i_hash_shift,
23331da177e4SLinus Torvalds 					&i_hash_mask,
233431fe62b9STim Bird 					0,
23351da177e4SLinus Torvalds 					0);
23361da177e4SLinus Torvalds }
23371da177e4SLinus Torvalds 
init_special_inode(struct inode * inode,umode_t mode,dev_t rdev)23381da177e4SLinus Torvalds void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
23391da177e4SLinus Torvalds {
23401da177e4SLinus Torvalds 	inode->i_mode = mode;
23411da177e4SLinus Torvalds 	if (S_ISCHR(mode)) {
23421da177e4SLinus Torvalds 		inode->i_fop = &def_chr_fops;
23431da177e4SLinus Torvalds 		inode->i_rdev = rdev;
23441da177e4SLinus Torvalds 	} else if (S_ISBLK(mode)) {
2345bda2795aSChristoph Hellwig 		if (IS_ENABLED(CONFIG_BLOCK))
23461da177e4SLinus Torvalds 			inode->i_fop = &def_blk_fops;
23471da177e4SLinus Torvalds 		inode->i_rdev = rdev;
23481da177e4SLinus Torvalds 	} else if (S_ISFIFO(mode))
2349599a0ac1SAl Viro 		inode->i_fop = &pipefifo_fops;
23501da177e4SLinus Torvalds 	else if (S_ISSOCK(mode))
2351bd9b51e7SAl Viro 		;	/* leave it no_open_fops */
23521da177e4SLinus Torvalds 	else
2353af0d9ae8SManish Katiyar 		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
2354af0d9ae8SManish Katiyar 				  " inode %s:%lu\n", mode, inode->i_sb->s_id,
2355af0d9ae8SManish Katiyar 				  inode->i_ino);
23561da177e4SLinus Torvalds }
23571da177e4SLinus Torvalds EXPORT_SYMBOL(init_special_inode);
2358a1bd120dSDmitry Monakhov 
2359a1bd120dSDmitry Monakhov /**
2360eaae668dSBen Hutchings  * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
2361f2d40141SChristian Brauner  * @idmap: idmap of the mount the inode was created from
2362a1bd120dSDmitry Monakhov  * @inode: New inode
2363a1bd120dSDmitry Monakhov  * @dir: Directory inode
2364a1bd120dSDmitry Monakhov  * @mode: mode of the new inode
236521cb47beSChristian Brauner  *
2366f2d40141SChristian Brauner  * If the inode has been created through an idmapped mount the idmap of
2367f2d40141SChristian Brauner  * the vfsmount must be passed through @idmap. This function will then take
2368f2d40141SChristian Brauner  * care to map the inode according to @idmap before checking permissions
236921cb47beSChristian Brauner  * and initializing i_uid and i_gid. On non-idmapped mounts or if permission
2370f2d40141SChristian Brauner  * checking is to be performed on the raw inode simply pass @nop_mnt_idmap.
2371a1bd120dSDmitry Monakhov  */
inode_init_owner(struct mnt_idmap * idmap,struct inode * inode,const struct inode * dir,umode_t mode)2372f2d40141SChristian Brauner void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
237321cb47beSChristian Brauner 		      const struct inode *dir, umode_t mode)
2374a1bd120dSDmitry Monakhov {
2375c14329d3SChristian Brauner 	inode_fsuid_set(inode, idmap);
2376a1bd120dSDmitry Monakhov 	if (dir && dir->i_mode & S_ISGID) {
2377a1bd120dSDmitry Monakhov 		inode->i_gid = dir->i_gid;
23780fa3ecd8SLinus Torvalds 
23790fa3ecd8SLinus Torvalds 		/* Directories are special, and always inherit S_ISGID */
2380a1bd120dSDmitry Monakhov 		if (S_ISDIR(mode))
2381a1bd120dSDmitry Monakhov 			mode |= S_ISGID;
2382a1bd120dSDmitry Monakhov 	} else
2383c14329d3SChristian Brauner 		inode_fsgid_set(inode, idmap);
2384a1bd120dSDmitry Monakhov 	inode->i_mode = mode;
2385a1bd120dSDmitry Monakhov }
2386a1bd120dSDmitry Monakhov EXPORT_SYMBOL(inode_init_owner);
2387e795b717SSerge E. Hallyn 
23882e149670SSerge E. Hallyn /**
23892e149670SSerge E. Hallyn  * inode_owner_or_capable - check current task permissions to inode
239001beba79SChristian Brauner  * @idmap: idmap of the mount the inode was found from
23912e149670SSerge E. Hallyn  * @inode: inode being checked
23922e149670SSerge E. Hallyn  *
239323adbe12SAndy Lutomirski  * Return true if current either has CAP_FOWNER in a namespace with the
239423adbe12SAndy Lutomirski  * inode owner uid mapped, or owns the file.
239521cb47beSChristian Brauner  *
239601beba79SChristian Brauner  * If the inode has been found through an idmapped mount the idmap of
239701beba79SChristian Brauner  * the vfsmount must be passed through @idmap. This function will then take
239801beba79SChristian Brauner  * care to map the inode according to @idmap before checking permissions.
239921cb47beSChristian Brauner  * On non-idmapped mounts or if permission checking is to be performed on the
240001beba79SChristian Brauner  * raw inode simply passs @nop_mnt_idmap.
2401e795b717SSerge E. Hallyn  */
inode_owner_or_capable(struct mnt_idmap * idmap,const struct inode * inode)240201beba79SChristian Brauner bool inode_owner_or_capable(struct mnt_idmap *idmap,
240321cb47beSChristian Brauner 			    const struct inode *inode)
2404e795b717SSerge E. Hallyn {
2405a2bd096fSChristian Brauner 	vfsuid_t vfsuid;
240623adbe12SAndy Lutomirski 	struct user_namespace *ns;
240723adbe12SAndy Lutomirski 
2408e67fe633SChristian Brauner 	vfsuid = i_uid_into_vfsuid(idmap, inode);
2409a2bd096fSChristian Brauner 	if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
2410e795b717SSerge E. Hallyn 		return true;
241123adbe12SAndy Lutomirski 
241223adbe12SAndy Lutomirski 	ns = current_user_ns();
2413a2bd096fSChristian Brauner 	if (vfsuid_has_mapping(ns, vfsuid) && ns_capable(ns, CAP_FOWNER))
2414e795b717SSerge E. Hallyn 		return true;
2415e795b717SSerge E. Hallyn 	return false;
2416e795b717SSerge E. Hallyn }
24172e149670SSerge E. Hallyn EXPORT_SYMBOL(inode_owner_or_capable);
24181d59d61fSTrond Myklebust 
24191d59d61fSTrond Myklebust /*
24201d59d61fSTrond Myklebust  * Direct i/o helper functions
24211d59d61fSTrond Myklebust  */
__inode_dio_wait(struct inode * inode)24221d59d61fSTrond Myklebust static void __inode_dio_wait(struct inode *inode)
24231d59d61fSTrond Myklebust {
24241d59d61fSTrond Myklebust 	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
24251d59d61fSTrond Myklebust 	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
24261d59d61fSTrond Myklebust 
24271d59d61fSTrond Myklebust 	do {
242821417136SIngo Molnar 		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
24291d59d61fSTrond Myklebust 		if (atomic_read(&inode->i_dio_count))
24301d59d61fSTrond Myklebust 			schedule();
24311d59d61fSTrond Myklebust 	} while (atomic_read(&inode->i_dio_count));
243221417136SIngo Molnar 	finish_wait(wq, &q.wq_entry);
24331d59d61fSTrond Myklebust }
24341d59d61fSTrond Myklebust 
24351d59d61fSTrond Myklebust /**
24361d59d61fSTrond Myklebust  * inode_dio_wait - wait for outstanding DIO requests to finish
24371d59d61fSTrond Myklebust  * @inode: inode to wait for
24381d59d61fSTrond Myklebust  *
24391d59d61fSTrond Myklebust  * Waits for all pending direct I/O requests to finish so that we can
24401d59d61fSTrond Myklebust  * proceed with a truncate or equivalent operation.
24411d59d61fSTrond Myklebust  *
24421d59d61fSTrond Myklebust  * Must be called under a lock that serializes taking new references
24431d59d61fSTrond Myklebust  * to i_dio_count, usually by inode->i_mutex.
24441d59d61fSTrond Myklebust  */
inode_dio_wait(struct inode * inode)24451d59d61fSTrond Myklebust void inode_dio_wait(struct inode *inode)
24461d59d61fSTrond Myklebust {
24471d59d61fSTrond Myklebust 	if (atomic_read(&inode->i_dio_count))
24481d59d61fSTrond Myklebust 		__inode_dio_wait(inode);
24491d59d61fSTrond Myklebust }
24501d59d61fSTrond Myklebust EXPORT_SYMBOL(inode_dio_wait);
24511d59d61fSTrond Myklebust 
24521d59d61fSTrond Myklebust /*
24535f16f322STheodore Ts'o  * inode_set_flags - atomically set some inode flags
24545f16f322STheodore Ts'o  *
24555f16f322STheodore Ts'o  * Note: the caller should be holding i_mutex, or else be sure that
24565f16f322STheodore Ts'o  * they have exclusive access to the inode structure (i.e., while the
24575f16f322STheodore Ts'o  * inode is being instantiated).  The reason for the cmpxchg() loop
24585f16f322STheodore Ts'o  * --- which wouldn't be necessary if all code paths which modify
24595f16f322STheodore Ts'o  * i_flags actually followed this rule, is that there is at least one
24605fa8e0a1SJan Kara  * code path which doesn't today so we use cmpxchg() out of an abundance
24615fa8e0a1SJan Kara  * of caution.
24625f16f322STheodore Ts'o  *
24635f16f322STheodore Ts'o  * In the long run, i_mutex is overkill, and we should probably look
24645f16f322STheodore Ts'o  * at using the i_lock spinlock to protect i_flags, and then make sure
24655f16f322STheodore Ts'o  * it is so documented in include/linux/fs.h and that all code follows
24665f16f322STheodore Ts'o  * the locking convention!!
24675f16f322STheodore Ts'o  */
inode_set_flags(struct inode * inode,unsigned int flags,unsigned int mask)24685f16f322STheodore Ts'o void inode_set_flags(struct inode *inode, unsigned int flags,
24695f16f322STheodore Ts'o 		     unsigned int mask)
24705f16f322STheodore Ts'o {
24715f16f322STheodore Ts'o 	WARN_ON_ONCE(flags & ~mask);
2472a905737fSVineet Gupta 	set_mask_bits(&inode->i_flags, mask, flags);
24735f16f322STheodore Ts'o }
24745f16f322STheodore Ts'o EXPORT_SYMBOL(inode_set_flags);
247521fc61c7SAl Viro 
inode_nohighmem(struct inode * inode)247621fc61c7SAl Viro void inode_nohighmem(struct inode *inode)
247721fc61c7SAl Viro {
247821fc61c7SAl Viro 	mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
247921fc61c7SAl Viro }
248021fc61c7SAl Viro EXPORT_SYMBOL(inode_nohighmem);
24813cd88666SDeepa Dinamani 
24823cd88666SDeepa Dinamani /**
248350e17c00SDeepa Dinamani  * timestamp_truncate - Truncate timespec to a granularity
248450e17c00SDeepa Dinamani  * @t: Timespec
248550e17c00SDeepa Dinamani  * @inode: inode being updated
248650e17c00SDeepa Dinamani  *
248750e17c00SDeepa Dinamani  * Truncate a timespec to the granularity supported by the fs
248850e17c00SDeepa Dinamani  * containing the inode. Always rounds down. gran must
248950e17c00SDeepa Dinamani  * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
249050e17c00SDeepa Dinamani  */
timestamp_truncate(struct timespec64 t,struct inode * inode)249150e17c00SDeepa Dinamani struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
249250e17c00SDeepa Dinamani {
249350e17c00SDeepa Dinamani 	struct super_block *sb = inode->i_sb;
249450e17c00SDeepa Dinamani 	unsigned int gran = sb->s_time_gran;
249550e17c00SDeepa Dinamani 
249650e17c00SDeepa Dinamani 	t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
249750e17c00SDeepa Dinamani 	if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
249850e17c00SDeepa Dinamani 		t.tv_nsec = 0;
249950e17c00SDeepa Dinamani 
250050e17c00SDeepa Dinamani 	/* Avoid division in the common cases 1 ns and 1 s. */
250150e17c00SDeepa Dinamani 	if (gran == 1)
250250e17c00SDeepa Dinamani 		; /* nothing */
250350e17c00SDeepa Dinamani 	else if (gran == NSEC_PER_SEC)
250450e17c00SDeepa Dinamani 		t.tv_nsec = 0;
250550e17c00SDeepa Dinamani 	else if (gran > 1 && gran < NSEC_PER_SEC)
250650e17c00SDeepa Dinamani 		t.tv_nsec -= t.tv_nsec % gran;
250750e17c00SDeepa Dinamani 	else
250850e17c00SDeepa Dinamani 		WARN(1, "invalid file time granularity: %u", gran);
250950e17c00SDeepa Dinamani 	return t;
251050e17c00SDeepa Dinamani }
251150e17c00SDeepa Dinamani EXPORT_SYMBOL(timestamp_truncate);
251250e17c00SDeepa Dinamani 
251350e17c00SDeepa Dinamani /**
25143cd88666SDeepa Dinamani  * current_time - Return FS time
25153cd88666SDeepa Dinamani  * @inode: inode.
25163cd88666SDeepa Dinamani  *
25173cd88666SDeepa Dinamani  * Return the current time truncated to the time granularity supported by
25183cd88666SDeepa Dinamani  * the fs.
25193cd88666SDeepa Dinamani  *
25203cd88666SDeepa Dinamani  * Note that inode and inode->sb cannot be NULL.
25213cd88666SDeepa Dinamani  * Otherwise, the function warns and returns time without truncation.
25223cd88666SDeepa Dinamani  */
current_time(struct inode * inode)252395582b00SDeepa Dinamani struct timespec64 current_time(struct inode *inode)
25243cd88666SDeepa Dinamani {
2525d651d160SArnd Bergmann 	struct timespec64 now;
2526d651d160SArnd Bergmann 
2527d651d160SArnd Bergmann 	ktime_get_coarse_real_ts64(&now);
252850e17c00SDeepa Dinamani 	return timestamp_truncate(now, inode);
25293cd88666SDeepa Dinamani }
25303cd88666SDeepa Dinamani EXPORT_SYMBOL(current_time);
25312b3416ceSYang Xu 
25322b3416ceSYang Xu /**
25339b6304c1SJeff Layton  * inode_set_ctime_current - set the ctime to current_time
25349b6304c1SJeff Layton  * @inode: inode
25359b6304c1SJeff Layton  *
25369b6304c1SJeff Layton  * Set the inode->i_ctime to the current value for the inode. Returns
25379b6304c1SJeff Layton  * the current value that was assigned to i_ctime.
25389b6304c1SJeff Layton  */
inode_set_ctime_current(struct inode * inode)25399b6304c1SJeff Layton struct timespec64 inode_set_ctime_current(struct inode *inode)
25409b6304c1SJeff Layton {
2541647aa768SChristian Brauner 	struct timespec64 now = current_time(inode);
25429b6304c1SJeff Layton 
2543647aa768SChristian Brauner 	inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
25449b6304c1SJeff Layton 	return now;
25459b6304c1SJeff Layton }
25469b6304c1SJeff Layton EXPORT_SYMBOL(inode_set_ctime_current);
25479b6304c1SJeff Layton 
25489b6304c1SJeff Layton /**
254911c2a870SChristian Brauner  * in_group_or_capable - check whether caller is CAP_FSETID privileged
25509452e93eSChristian Brauner  * @idmap:	idmap of the mount @inode was found from
255111c2a870SChristian Brauner  * @inode:	inode to check
255211c2a870SChristian Brauner  * @vfsgid:	the new/current vfsgid of @inode
255311c2a870SChristian Brauner  *
255411c2a870SChristian Brauner  * Check wether @vfsgid is in the caller's group list or if the caller is
255511c2a870SChristian Brauner  * privileged with CAP_FSETID over @inode. This can be used to determine
255611c2a870SChristian Brauner  * whether the setgid bit can be kept or must be dropped.
255711c2a870SChristian Brauner  *
255811c2a870SChristian Brauner  * Return: true if the caller is sufficiently privileged, false if not.
255911c2a870SChristian Brauner  */
in_group_or_capable(struct mnt_idmap * idmap,const struct inode * inode,vfsgid_t vfsgid)25609452e93eSChristian Brauner bool in_group_or_capable(struct mnt_idmap *idmap,
256111c2a870SChristian Brauner 			 const struct inode *inode, vfsgid_t vfsgid)
256211c2a870SChristian Brauner {
256311c2a870SChristian Brauner 	if (vfsgid_in_group_p(vfsgid))
256411c2a870SChristian Brauner 		return true;
25659452e93eSChristian Brauner 	if (capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
256611c2a870SChristian Brauner 		return true;
256711c2a870SChristian Brauner 	return false;
256811c2a870SChristian Brauner }
256911c2a870SChristian Brauner 
257011c2a870SChristian Brauner /**
25712b3416ceSYang Xu  * mode_strip_sgid - handle the sgid bit for non-directories
25729452e93eSChristian Brauner  * @idmap: idmap of the mount the inode was created from
25732b3416ceSYang Xu  * @dir: parent directory inode
25742b3416ceSYang Xu  * @mode: mode of the file to be created in @dir
25752b3416ceSYang Xu  *
25762b3416ceSYang Xu  * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
25772b3416ceSYang Xu  * raised and @dir has the S_ISGID bit raised ensure that the caller is
25782b3416ceSYang Xu  * either in the group of the parent directory or they have CAP_FSETID
25792b3416ceSYang Xu  * in their user namespace and are privileged over the parent directory.
25802b3416ceSYang Xu  * In all other cases, strip the S_ISGID bit from @mode.
25812b3416ceSYang Xu  *
25822b3416ceSYang Xu  * Return: the new mode to use for the file
25832b3416ceSYang Xu  */
mode_strip_sgid(struct mnt_idmap * idmap,const struct inode * dir,umode_t mode)25849452e93eSChristian Brauner umode_t mode_strip_sgid(struct mnt_idmap *idmap,
25852b3416ceSYang Xu 			const struct inode *dir, umode_t mode)
25862b3416ceSYang Xu {
25872b3416ceSYang Xu 	if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
25882b3416ceSYang Xu 		return mode;
25892b3416ceSYang Xu 	if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
25902b3416ceSYang Xu 		return mode;
2591e67fe633SChristian Brauner 	if (in_group_or_capable(idmap, dir, i_gid_into_vfsgid(idmap, dir)))
25922b3416ceSYang Xu 		return mode;
25932b3416ceSYang Xu 	return mode & ~S_ISGID;
25942b3416ceSYang Xu }
25952b3416ceSYang Xu EXPORT_SYMBOL(mode_strip_sgid);
2596