xref: /openbmc/linux/fs/inode.c (revision ecc23d0a422a3118fcf6e4f0a46e17a6c2047b02)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * (C) 1997 Linus Torvalds
44b4563dcSChristoph Hellwig  * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
51da177e4SLinus Torvalds  */
6e59cc473SAl Viro #include <linux/export.h>
71da177e4SLinus Torvalds #include <linux/fs.h>
85970e15dSJeff Layton #include <linux/filelock.h>
91da177e4SLinus Torvalds #include <linux/mm.h>
101da177e4SLinus Torvalds #include <linux/backing-dev.h>
111da177e4SLinus Torvalds #include <linux/hash.h>
121da177e4SLinus Torvalds #include <linux/swap.h>
131da177e4SLinus Torvalds #include <linux/security.h>
141da177e4SLinus Torvalds #include <linux/cdev.h>
1557c8a661SMike Rapoport #include <linux/memblock.h>
163be25f49SEric Paris #include <linux/fsnotify.h>
17fc33a7bbSChristoph Hellwig #include <linux/mount.h>
18f19d4a8fSAl Viro #include <linux/posix_acl.h>
194b4563dcSChristoph Hellwig #include <linux/buffer_head.h> /* for inode_has_buffers */
207ada4db8SMiklos Szeredi #include <linux/ratelimit.h>
21bc3b14cbSDave Chinner #include <linux/list_lru.h>
22ae5e165dSJeff Layton #include <linux/iversion.h>
230ae45f63STheodore Ts'o #include <trace/events/writeback.h>
24a66979abSDave Chinner #include "internal.h"
251da177e4SLinus Torvalds 
261da177e4SLinus Torvalds /*
274b4563dcSChristoph Hellwig  * Inode locking rules:
28250df6edSDave Chinner  *
29250df6edSDave Chinner  * inode->i_lock protects:
3010e14073SJchao Sun  *   inode->i_state, inode->i_hash, __iget(), inode->i_io_list
31bc3b14cbSDave Chinner  * Inode LRU list locks protect:
3298b745c6SDave Chinner  *   inode->i_sb->s_inode_lru, inode->i_lru
3374278da9SDave Chinner  * inode->i_sb->s_inode_list_lock protects:
3474278da9SDave Chinner  *   inode->i_sb->s_inodes, inode->i_sb_list
35f758eeabSChristoph Hellwig  * bdi->wb.list_lock protects:
36c7f54084SDave Chinner  *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
3767a23c49SDave Chinner  * inode_hash_lock protects:
3867a23c49SDave Chinner  *   inode_hashtable, inode->i_hash
39250df6edSDave Chinner  *
40250df6edSDave Chinner  * Lock ordering:
4155fa6091SDave Chinner  *
4274278da9SDave Chinner  * inode->i_sb->s_inode_list_lock
4355fa6091SDave Chinner  *   inode->i_lock
44bc3b14cbSDave Chinner  *     Inode LRU list locks
45a66979abSDave Chinner  *
46f758eeabSChristoph Hellwig  * bdi->wb.list_lock
47a66979abSDave Chinner  *   inode->i_lock
4867a23c49SDave Chinner  *
4967a23c49SDave Chinner  * inode_hash_lock
5074278da9SDave Chinner  *   inode->i_sb->s_inode_list_lock
5167a23c49SDave Chinner  *   inode->i_lock
5267a23c49SDave Chinner  *
5367a23c49SDave Chinner  * iunique_lock
5467a23c49SDave Chinner  *   inode_hash_lock
55250df6edSDave Chinner  */
56250df6edSDave Chinner 
57fa3536ccSEric Dumazet static unsigned int i_hash_mask __read_mostly;
58fa3536ccSEric Dumazet static unsigned int i_hash_shift __read_mostly;
5967a23c49SDave Chinner static struct hlist_head *inode_hashtable __read_mostly;
6067a23c49SDave Chinner static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds /*
637dcda1c9SJens Axboe  * Empty aops. Can be used for the cases where the user does not
647dcda1c9SJens Axboe  * define any of the address_space operations.
657dcda1c9SJens Axboe  */
667dcda1c9SJens Axboe const struct address_space_operations empty_aops = {
677dcda1c9SJens Axboe };
687dcda1c9SJens Axboe EXPORT_SYMBOL(empty_aops);
697dcda1c9SJens Axboe 
703942c07cSGlauber Costa static DEFINE_PER_CPU(unsigned long, nr_inodes);
713942c07cSGlauber Costa static DEFINE_PER_CPU(unsigned long, nr_unused);
72cffbc8aaSDave Chinner 
73e18b890bSChristoph Lameter static struct kmem_cache *inode_cachep __read_mostly;
741da177e4SLinus Torvalds 
get_nr_inodes(void)753942c07cSGlauber Costa static long get_nr_inodes(void)
76cffbc8aaSDave Chinner {
773e880fb5SNick Piggin 	int i;
783942c07cSGlauber Costa 	long sum = 0;
793e880fb5SNick Piggin 	for_each_possible_cpu(i)
803e880fb5SNick Piggin 		sum += per_cpu(nr_inodes, i);
813e880fb5SNick Piggin 	return sum < 0 ? 0 : sum;
82cffbc8aaSDave Chinner }
83cffbc8aaSDave Chinner 
get_nr_inodes_unused(void)843942c07cSGlauber Costa static inline long get_nr_inodes_unused(void)
85cffbc8aaSDave Chinner {
86fcb94f72SDave Chinner 	int i;
873942c07cSGlauber Costa 	long sum = 0;
88fcb94f72SDave Chinner 	for_each_possible_cpu(i)
89fcb94f72SDave Chinner 		sum += per_cpu(nr_unused, i);
90fcb94f72SDave Chinner 	return sum < 0 ? 0 : sum;
91cffbc8aaSDave Chinner }
92cffbc8aaSDave Chinner 
get_nr_dirty_inodes(void)933942c07cSGlauber Costa long get_nr_dirty_inodes(void)
94cffbc8aaSDave Chinner {
953e880fb5SNick Piggin 	/* not actually dirty inodes, but a wild approximation */
963942c07cSGlauber Costa 	long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
97cffbc8aaSDave Chinner 	return nr_dirty > 0 ? nr_dirty : 0;
98cffbc8aaSDave Chinner }
99cffbc8aaSDave Chinner 
100cffbc8aaSDave Chinner /*
101cffbc8aaSDave Chinner  * Handle nr_inode sysctl
102cffbc8aaSDave Chinner  */
103cffbc8aaSDave Chinner #ifdef CONFIG_SYSCTL
1041d67fe58SLuis Chamberlain /*
1051d67fe58SLuis Chamberlain  * Statistics gathering..
1061d67fe58SLuis Chamberlain  */
1071d67fe58SLuis Chamberlain static struct inodes_stat_t inodes_stat;
1081d67fe58SLuis Chamberlain 
proc_nr_inodes(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1091d67fe58SLuis Chamberlain static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
1101d67fe58SLuis Chamberlain 			  size_t *lenp, loff_t *ppos)
111cffbc8aaSDave Chinner {
112cffbc8aaSDave Chinner 	inodes_stat.nr_inodes = get_nr_inodes();
113fcb94f72SDave Chinner 	inodes_stat.nr_unused = get_nr_inodes_unused();
1143942c07cSGlauber Costa 	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
115cffbc8aaSDave Chinner }
1161d67fe58SLuis Chamberlain 
1171d67fe58SLuis Chamberlain static struct ctl_table inodes_sysctls[] = {
1181d67fe58SLuis Chamberlain 	{
1191d67fe58SLuis Chamberlain 		.procname	= "inode-nr",
1201d67fe58SLuis Chamberlain 		.data		= &inodes_stat,
1211d67fe58SLuis Chamberlain 		.maxlen		= 2*sizeof(long),
1221d67fe58SLuis Chamberlain 		.mode		= 0444,
1231d67fe58SLuis Chamberlain 		.proc_handler	= proc_nr_inodes,
1241d67fe58SLuis Chamberlain 	},
1251d67fe58SLuis Chamberlain 	{
1261d67fe58SLuis Chamberlain 		.procname	= "inode-state",
1271d67fe58SLuis Chamberlain 		.data		= &inodes_stat,
1281d67fe58SLuis Chamberlain 		.maxlen		= 7*sizeof(long),
1291d67fe58SLuis Chamberlain 		.mode		= 0444,
1301d67fe58SLuis Chamberlain 		.proc_handler	= proc_nr_inodes,
1311d67fe58SLuis Chamberlain 	},
1321d67fe58SLuis Chamberlain 	{ }
1331d67fe58SLuis Chamberlain };
1341d67fe58SLuis Chamberlain 
init_fs_inode_sysctls(void)1351d67fe58SLuis Chamberlain static int __init init_fs_inode_sysctls(void)
1361d67fe58SLuis Chamberlain {
1371d67fe58SLuis Chamberlain 	register_sysctl_init("fs", inodes_sysctls);
1381d67fe58SLuis Chamberlain 	return 0;
1391d67fe58SLuis Chamberlain }
1401d67fe58SLuis Chamberlain early_initcall(init_fs_inode_sysctls);
141cffbc8aaSDave Chinner #endif
142cffbc8aaSDave Chinner 
no_open(struct inode * inode,struct file * file)143bd9b51e7SAl Viro static int no_open(struct inode *inode, struct file *file)
144bd9b51e7SAl Viro {
145bd9b51e7SAl Viro 	return -ENXIO;
146bd9b51e7SAl Viro }
147bd9b51e7SAl Viro 
1482cb1599fSDavid Chinner /**
1496e7c2b4dSMasahiro Yamada  * inode_init_always - perform inode structure initialisation
1500bc02f3fSRandy Dunlap  * @sb: superblock inode belongs to
1510bc02f3fSRandy Dunlap  * @inode: inode to initialise
1522cb1599fSDavid Chinner  *
1532cb1599fSDavid Chinner  * These are initializations that need to be done on every inode
1542cb1599fSDavid Chinner  * allocation as the fields are not initialised by slab allocation.
1552cb1599fSDavid Chinner  */
inode_init_always(struct super_block * sb,struct inode * inode)15654e34621SChristoph Hellwig int inode_init_always(struct super_block *sb, struct inode *inode)
1571da177e4SLinus Torvalds {
1586e1d5dccSAlexey Dobriyan 	static const struct inode_operations empty_iops;
159bd9b51e7SAl Viro 	static const struct file_operations no_open_fops = {.open = no_open};
1601da177e4SLinus Torvalds 	struct address_space *const mapping = &inode->i_data;
1611da177e4SLinus Torvalds 
1621da177e4SLinus Torvalds 	inode->i_sb = sb;
1631da177e4SLinus Torvalds 	inode->i_blkbits = sb->s_blocksize_bits;
1641da177e4SLinus Torvalds 	inode->i_flags = 0;
1658019ad13SPeter Zijlstra 	atomic64_set(&inode->i_sequence, 0);
1661da177e4SLinus Torvalds 	atomic_set(&inode->i_count, 1);
1671da177e4SLinus Torvalds 	inode->i_op = &empty_iops;
168bd9b51e7SAl Viro 	inode->i_fop = &no_open_fops;
169edbb35ccSEric Biggers 	inode->i_ino = 0;
170a78ef704SMiklos Szeredi 	inode->__i_nlink = 1;
1713ddcd056SLinus Torvalds 	inode->i_opflags = 0;
172d0a5b995SAndreas Gruenbacher 	if (sb->s_xattr)
173d0a5b995SAndreas Gruenbacher 		inode->i_opflags |= IOP_XATTR;
17492361636SEric W. Biederman 	i_uid_write(inode, 0);
17592361636SEric W. Biederman 	i_gid_write(inode, 0);
1761da177e4SLinus Torvalds 	atomic_set(&inode->i_writecount, 0);
1771da177e4SLinus Torvalds 	inode->i_size = 0;
178c75b1d94SJens Axboe 	inode->i_write_hint = WRITE_LIFE_NOT_SET;
1791da177e4SLinus Torvalds 	inode->i_blocks = 0;
1801da177e4SLinus Torvalds 	inode->i_bytes = 0;
1811da177e4SLinus Torvalds 	inode->i_generation = 0;
1821da177e4SLinus Torvalds 	inode->i_pipe = NULL;
1831da177e4SLinus Torvalds 	inode->i_cdev = NULL;
18461ba64fcSAl Viro 	inode->i_link = NULL;
18584e710daSAl Viro 	inode->i_dir_seq = 0;
1861da177e4SLinus Torvalds 	inode->i_rdev = 0;
1871da177e4SLinus Torvalds 	inode->dirtied_when = 0;
1886146f0d5SMimi Zohar 
1893d65ae46STahsin Erdogan #ifdef CONFIG_CGROUP_WRITEBACK
1903d65ae46STahsin Erdogan 	inode->i_wb_frn_winner = 0;
1913d65ae46STahsin Erdogan 	inode->i_wb_frn_avg_time = 0;
1923d65ae46STahsin Erdogan 	inode->i_wb_frn_history = 0;
1933d65ae46STahsin Erdogan #endif
1943d65ae46STahsin Erdogan 
195d475fd42SPeter Zijlstra 	spin_lock_init(&inode->i_lock);
196d475fd42SPeter Zijlstra 	lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
197d475fd42SPeter Zijlstra 
1989902af79SAl Viro 	init_rwsem(&inode->i_rwsem);
1999902af79SAl Viro 	lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
200d475fd42SPeter Zijlstra 
201bd5fe6c5SChristoph Hellwig 	atomic_set(&inode->i_dio_count, 0);
202d475fd42SPeter Zijlstra 
2031da177e4SLinus Torvalds 	mapping->a_ops = &empty_aops;
2041da177e4SLinus Torvalds 	mapping->host = inode;
2051da177e4SLinus Torvalds 	mapping->flags = 0;
206829bc787SDarrick J. Wong 	mapping->wb_err = 0;
2074bb5f5d9SDavid Herrmann 	atomic_set(&mapping->i_mmap_writable, 0);
20809d91cdaSSong Liu #ifdef CONFIG_READ_ONLY_THP_FOR_FS
20909d91cdaSSong Liu 	atomic_set(&mapping->nr_thps, 0);
21009d91cdaSSong Liu #endif
2113c1d4378SHugh Dickins 	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
212252aa6f5SRafael Aquini 	mapping->private_data = NULL;
2137d455e00SChris Mason 	mapping->writeback_index = 0;
21423ca067bSSebastian Andrzej Siewior 	init_rwsem(&mapping->invalidate_lock);
21523ca067bSSebastian Andrzej Siewior 	lockdep_set_class_and_name(&mapping->invalidate_lock,
21623ca067bSSebastian Andrzej Siewior 				   &sb->s_type->invalidate_lock_key,
21723ca067bSSebastian Andrzej Siewior 				   "mapping.invalidate_lock");
2183461e3bfSChristoph Hellwig 	if (sb->s_iflags & SB_I_STABLE_WRITES)
2193461e3bfSChristoph Hellwig 		mapping_set_stable_writes(mapping);
220e6c6e640SAl Viro 	inode->i_private = NULL;
2211da177e4SLinus Torvalds 	inode->i_mapping = mapping;
222b3d9b7a3SAl Viro 	INIT_HLIST_HEAD(&inode->i_dentry);	/* buggered by rcu freeing */
223f19d4a8fSAl Viro #ifdef CONFIG_FS_POSIX_ACL
224f19d4a8fSAl Viro 	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
225f19d4a8fSAl Viro #endif
2262cb1599fSDavid Chinner 
2273be25f49SEric Paris #ifdef CONFIG_FSNOTIFY
2283be25f49SEric Paris 	inode->i_fsnotify_mask = 0;
2293be25f49SEric Paris #endif
2304a075e39SJeff Layton 	inode->i_flctx = NULL;
2312e488f13SDongliang Mu 
2322e488f13SDongliang Mu 	if (unlikely(security_inode_alloc(inode)))
2332e488f13SDongliang Mu 		return -ENOMEM;
2343e880fb5SNick Piggin 	this_cpu_inc(nr_inodes);
235cffbc8aaSDave Chinner 
23654e34621SChristoph Hellwig 	return 0;
2371da177e4SLinus Torvalds }
2382cb1599fSDavid Chinner EXPORT_SYMBOL(inode_init_always);
2392cb1599fSDavid Chinner 
free_inode_nonrcu(struct inode * inode)240fdb0da89SAl Viro void free_inode_nonrcu(struct inode *inode)
241fdb0da89SAl Viro {
242fdb0da89SAl Viro 	kmem_cache_free(inode_cachep, inode);
243fdb0da89SAl Viro }
244fdb0da89SAl Viro EXPORT_SYMBOL(free_inode_nonrcu);
245fdb0da89SAl Viro 
i_callback(struct rcu_head * head)246fdb0da89SAl Viro static void i_callback(struct rcu_head *head)
247fdb0da89SAl Viro {
248fdb0da89SAl Viro 	struct inode *inode = container_of(head, struct inode, i_rcu);
249fdb0da89SAl Viro 	if (inode->free_inode)
250fdb0da89SAl Viro 		inode->free_inode(inode);
251fdb0da89SAl Viro 	else
252fdb0da89SAl Viro 		free_inode_nonrcu(inode);
253fdb0da89SAl Viro }
254fdb0da89SAl Viro 
alloc_inode(struct super_block * sb)2552cb1599fSDavid Chinner static struct inode *alloc_inode(struct super_block *sb)
2562cb1599fSDavid Chinner {
257fdb0da89SAl Viro 	const struct super_operations *ops = sb->s_op;
2582cb1599fSDavid Chinner 	struct inode *inode;
2592cb1599fSDavid Chinner 
260fdb0da89SAl Viro 	if (ops->alloc_inode)
261fdb0da89SAl Viro 		inode = ops->alloc_inode(sb);
2622cb1599fSDavid Chinner 	else
2638b9f3ac5SMuchun Song 		inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL);
2642cb1599fSDavid Chinner 
26554e34621SChristoph Hellwig 	if (!inode)
2662cb1599fSDavid Chinner 		return NULL;
26754e34621SChristoph Hellwig 
26854e34621SChristoph Hellwig 	if (unlikely(inode_init_always(sb, inode))) {
269fdb0da89SAl Viro 		if (ops->destroy_inode) {
270fdb0da89SAl Viro 			ops->destroy_inode(inode);
271fdb0da89SAl Viro 			if (!ops->free_inode)
272fdb0da89SAl Viro 				return NULL;
273fdb0da89SAl Viro 		}
274fdb0da89SAl Viro 		inode->free_inode = ops->free_inode;
275fdb0da89SAl Viro 		i_callback(&inode->i_rcu);
27654e34621SChristoph Hellwig 		return NULL;
27754e34621SChristoph Hellwig 	}
27854e34621SChristoph Hellwig 
27954e34621SChristoph Hellwig 	return inode;
2802cb1599fSDavid Chinner }
2811da177e4SLinus Torvalds 
__destroy_inode(struct inode * inode)2822e00c97eSChristoph Hellwig void __destroy_inode(struct inode *inode)
2831da177e4SLinus Torvalds {
284b7542f8cSEric Sesterhenn 	BUG_ON(inode_has_buffers(inode));
28552ebea74STejun Heo 	inode_detach_wb(inode);
2861da177e4SLinus Torvalds 	security_inode_free(inode);
2873be25f49SEric Paris 	fsnotify_inode_delete(inode);
288f27a0fe0SJeff Layton 	locks_free_lock_context(inode);
2897ada4db8SMiklos Szeredi 	if (!inode->i_nlink) {
2907ada4db8SMiklos Szeredi 		WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
2917ada4db8SMiklos Szeredi 		atomic_long_dec(&inode->i_sb->s_remove_count);
2927ada4db8SMiklos Szeredi 	}
2937ada4db8SMiklos Szeredi 
294f19d4a8fSAl Viro #ifdef CONFIG_FS_POSIX_ACL
295b8a7a3a6SAndreas Gruenbacher 	if (inode->i_acl && !is_uncached_acl(inode->i_acl))
296f19d4a8fSAl Viro 		posix_acl_release(inode->i_acl);
297b8a7a3a6SAndreas Gruenbacher 	if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
298f19d4a8fSAl Viro 		posix_acl_release(inode->i_default_acl);
299f19d4a8fSAl Viro #endif
3003e880fb5SNick Piggin 	this_cpu_dec(nr_inodes);
3012e00c97eSChristoph Hellwig }
3022e00c97eSChristoph Hellwig EXPORT_SYMBOL(__destroy_inode);
3032e00c97eSChristoph Hellwig 
destroy_inode(struct inode * inode)30456b0dacfSChristoph Hellwig static void destroy_inode(struct inode *inode)
3052e00c97eSChristoph Hellwig {
306fdb0da89SAl Viro 	const struct super_operations *ops = inode->i_sb->s_op;
307fdb0da89SAl Viro 
3087ccf19a8SNick Piggin 	BUG_ON(!list_empty(&inode->i_lru));
3092e00c97eSChristoph Hellwig 	__destroy_inode(inode);
310fdb0da89SAl Viro 	if (ops->destroy_inode) {
311fdb0da89SAl Viro 		ops->destroy_inode(inode);
312fdb0da89SAl Viro 		if (!ops->free_inode)
313fdb0da89SAl Viro 			return;
314fdb0da89SAl Viro 	}
315fdb0da89SAl Viro 	inode->free_inode = ops->free_inode;
316fa0d7e3dSNick Piggin 	call_rcu(&inode->i_rcu, i_callback);
3171da177e4SLinus Torvalds }
3181da177e4SLinus Torvalds 
3197ada4db8SMiklos Szeredi /**
3207ada4db8SMiklos Szeredi  * drop_nlink - directly drop an inode's link count
3217ada4db8SMiklos Szeredi  * @inode: inode
3227ada4db8SMiklos Szeredi  *
3237ada4db8SMiklos Szeredi  * This is a low-level filesystem helper to replace any
3247ada4db8SMiklos Szeredi  * direct filesystem manipulation of i_nlink.  In cases
3257ada4db8SMiklos Szeredi  * where we are attempting to track writes to the
3267ada4db8SMiklos Szeredi  * filesystem, a decrement to zero means an imminent
3277ada4db8SMiklos Szeredi  * write when the file is truncated and actually unlinked
3287ada4db8SMiklos Szeredi  * on the filesystem.
3297ada4db8SMiklos Szeredi  */
drop_nlink(struct inode * inode)3307ada4db8SMiklos Szeredi void drop_nlink(struct inode *inode)
3317ada4db8SMiklos Szeredi {
3327ada4db8SMiklos Szeredi 	WARN_ON(inode->i_nlink == 0);
3337ada4db8SMiklos Szeredi 	inode->__i_nlink--;
3347ada4db8SMiklos Szeredi 	if (!inode->i_nlink)
3357ada4db8SMiklos Szeredi 		atomic_long_inc(&inode->i_sb->s_remove_count);
3367ada4db8SMiklos Szeredi }
3377ada4db8SMiklos Szeredi EXPORT_SYMBOL(drop_nlink);
3387ada4db8SMiklos Szeredi 
3397ada4db8SMiklos Szeredi /**
3407ada4db8SMiklos Szeredi  * clear_nlink - directly zero an inode's link count
3417ada4db8SMiklos Szeredi  * @inode: inode
3427ada4db8SMiklos Szeredi  *
3437ada4db8SMiklos Szeredi  * This is a low-level filesystem helper to replace any
3447ada4db8SMiklos Szeredi  * direct filesystem manipulation of i_nlink.  See
3457ada4db8SMiklos Szeredi  * drop_nlink() for why we care about i_nlink hitting zero.
3467ada4db8SMiklos Szeredi  */
clear_nlink(struct inode * inode)3477ada4db8SMiklos Szeredi void clear_nlink(struct inode *inode)
3487ada4db8SMiklos Szeredi {
3497ada4db8SMiklos Szeredi 	if (inode->i_nlink) {
3507ada4db8SMiklos Szeredi 		inode->__i_nlink = 0;
3517ada4db8SMiklos Szeredi 		atomic_long_inc(&inode->i_sb->s_remove_count);
3527ada4db8SMiklos Szeredi 	}
3537ada4db8SMiklos Szeredi }
3547ada4db8SMiklos Szeredi EXPORT_SYMBOL(clear_nlink);
3557ada4db8SMiklos Szeredi 
3567ada4db8SMiklos Szeredi /**
3577ada4db8SMiklos Szeredi  * set_nlink - directly set an inode's link count
3587ada4db8SMiklos Szeredi  * @inode: inode
3597ada4db8SMiklos Szeredi  * @nlink: new nlink (should be non-zero)
3607ada4db8SMiklos Szeredi  *
3617ada4db8SMiklos Szeredi  * This is a low-level filesystem helper to replace any
3627ada4db8SMiklos Szeredi  * direct filesystem manipulation of i_nlink.
3637ada4db8SMiklos Szeredi  */
set_nlink(struct inode * inode,unsigned int nlink)3647ada4db8SMiklos Szeredi void set_nlink(struct inode *inode, unsigned int nlink)
3657ada4db8SMiklos Szeredi {
3667ada4db8SMiklos Szeredi 	if (!nlink) {
3677ada4db8SMiklos Szeredi 		clear_nlink(inode);
3687ada4db8SMiklos Szeredi 	} else {
3697ada4db8SMiklos Szeredi 		/* Yes, some filesystems do change nlink from zero to one */
3707ada4db8SMiklos Szeredi 		if (inode->i_nlink == 0)
3717ada4db8SMiklos Szeredi 			atomic_long_dec(&inode->i_sb->s_remove_count);
3727ada4db8SMiklos Szeredi 
3737ada4db8SMiklos Szeredi 		inode->__i_nlink = nlink;
3747ada4db8SMiklos Szeredi 	}
3757ada4db8SMiklos Szeredi }
3767ada4db8SMiklos Szeredi EXPORT_SYMBOL(set_nlink);
3777ada4db8SMiklos Szeredi 
3787ada4db8SMiklos Szeredi /**
3797ada4db8SMiklos Szeredi  * inc_nlink - directly increment an inode's link count
3807ada4db8SMiklos Szeredi  * @inode: inode
3817ada4db8SMiklos Szeredi  *
3827ada4db8SMiklos Szeredi  * This is a low-level filesystem helper to replace any
3837ada4db8SMiklos Szeredi  * direct filesystem manipulation of i_nlink.  Currently,
3847ada4db8SMiklos Szeredi  * it is only here for parity with dec_nlink().
3857ada4db8SMiklos Szeredi  */
inc_nlink(struct inode * inode)3867ada4db8SMiklos Szeredi void inc_nlink(struct inode *inode)
3877ada4db8SMiklos Szeredi {
388f4e0c30cSAl Viro 	if (unlikely(inode->i_nlink == 0)) {
389f4e0c30cSAl Viro 		WARN_ON(!(inode->i_state & I_LINKABLE));
3907ada4db8SMiklos Szeredi 		atomic_long_dec(&inode->i_sb->s_remove_count);
391f4e0c30cSAl Viro 	}
3927ada4db8SMiklos Szeredi 
3937ada4db8SMiklos Szeredi 	inode->__i_nlink++;
3947ada4db8SMiklos Szeredi }
3957ada4db8SMiklos Szeredi EXPORT_SYMBOL(inc_nlink);
3967ada4db8SMiklos Szeredi 
__address_space_init_once(struct address_space * mapping)397ae23395dSDave Chinner static void __address_space_init_once(struct address_space *mapping)
3982aa15890SMiklos Szeredi {
3997b785645SJohannes Weiner 	xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
400c8c06efaSDavidlohr Bueso 	init_rwsem(&mapping->i_mmap_rwsem);
4012aa15890SMiklos Szeredi 	INIT_LIST_HEAD(&mapping->private_list);
4022aa15890SMiklos Szeredi 	spin_lock_init(&mapping->private_lock);
403f808c13fSDavidlohr Bueso 	mapping->i_mmap = RB_ROOT_CACHED;
4042aa15890SMiklos Szeredi }
405ae23395dSDave Chinner 
address_space_init_once(struct address_space * mapping)406ae23395dSDave Chinner void address_space_init_once(struct address_space *mapping)
407ae23395dSDave Chinner {
408ae23395dSDave Chinner 	memset(mapping, 0, sizeof(*mapping));
409ae23395dSDave Chinner 	__address_space_init_once(mapping);
410ae23395dSDave Chinner }
4112aa15890SMiklos Szeredi EXPORT_SYMBOL(address_space_init_once);
4122aa15890SMiklos Szeredi 
4131da177e4SLinus Torvalds /*
4141da177e4SLinus Torvalds  * These are initializations that only need to be done
4151da177e4SLinus Torvalds  * once, because the fields are idempotent across use
4161da177e4SLinus Torvalds  * of the inode, so let the slab aware of that.
4171da177e4SLinus Torvalds  */
inode_init_once(struct inode * inode)4181da177e4SLinus Torvalds void inode_init_once(struct inode *inode)
4191da177e4SLinus Torvalds {
4201da177e4SLinus Torvalds 	memset(inode, 0, sizeof(*inode));
4211da177e4SLinus Torvalds 	INIT_HLIST_NODE(&inode->i_hash);
4221da177e4SLinus Torvalds 	INIT_LIST_HEAD(&inode->i_devices);
423c7f54084SDave Chinner 	INIT_LIST_HEAD(&inode->i_io_list);
4246c60d2b5SDave Chinner 	INIT_LIST_HEAD(&inode->i_wb_list);
4257ccf19a8SNick Piggin 	INIT_LIST_HEAD(&inode->i_lru);
42618cc912bSJeff Layton 	INIT_LIST_HEAD(&inode->i_sb_list);
427ae23395dSDave Chinner 	__address_space_init_once(&inode->i_data);
4281da177e4SLinus Torvalds 	i_size_ordered_init(inode);
4291da177e4SLinus Torvalds }
4301da177e4SLinus Torvalds EXPORT_SYMBOL(inode_init_once);
4311da177e4SLinus Torvalds 
init_once(void * foo)43251cc5068SAlexey Dobriyan static void init_once(void *foo)
4331da177e4SLinus Torvalds {
4341da177e4SLinus Torvalds 	struct inode *inode = (struct inode *) foo;
4351da177e4SLinus Torvalds 
4361da177e4SLinus Torvalds 	inode_init_once(inode);
4371da177e4SLinus Torvalds }
4381da177e4SLinus Torvalds 
4391da177e4SLinus Torvalds /*
440250df6edSDave Chinner  * inode->i_lock must be held
4411da177e4SLinus Torvalds  */
__iget(struct inode * inode)4421da177e4SLinus Torvalds void __iget(struct inode *inode)
4431da177e4SLinus Torvalds {
4449e38d86fSNick Piggin 	atomic_inc(&inode->i_count);
4451da177e4SLinus Torvalds }
4461da177e4SLinus Torvalds 
4477de9c6eeSAl Viro /*
4487de9c6eeSAl Viro  * get additional reference to inode; caller must already hold one.
4497de9c6eeSAl Viro  */
ihold(struct inode * inode)4507de9c6eeSAl Viro void ihold(struct inode *inode)
4517de9c6eeSAl Viro {
4527de9c6eeSAl Viro 	WARN_ON(atomic_inc_return(&inode->i_count) < 2);
4537de9c6eeSAl Viro }
4547de9c6eeSAl Viro EXPORT_SYMBOL(ihold);
4557de9c6eeSAl Viro 
__inode_add_lru(struct inode * inode,bool rotate)45651b8c1feSJohannes Weiner static void __inode_add_lru(struct inode *inode, bool rotate)
4579e38d86fSNick Piggin {
45851b8c1feSJohannes Weiner 	if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE))
45951b8c1feSJohannes Weiner 		return;
46051b8c1feSJohannes Weiner 	if (atomic_read(&inode->i_count))
46151b8c1feSJohannes Weiner 		return;
46251b8c1feSJohannes Weiner 	if (!(inode->i_sb->s_flags & SB_ACTIVE))
46351b8c1feSJohannes Weiner 		return;
46451b8c1feSJohannes Weiner 	if (!mapping_shrinkable(&inode->i_data))
46551b8c1feSJohannes Weiner 		return;
46651b8c1feSJohannes Weiner 
467bc3b14cbSDave Chinner 	if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
468fcb94f72SDave Chinner 		this_cpu_inc(nr_unused);
46951b8c1feSJohannes Weiner 	else if (rotate)
470563f4001SJosef Bacik 		inode->i_state |= I_REFERENCED;
4719e38d86fSNick Piggin }
4729e38d86fSNick Piggin 
4734eff96ddSJan Kara /*
4744eff96ddSJan Kara  * Add inode to LRU if needed (inode is unused and clean).
4754eff96ddSJan Kara  *
4764eff96ddSJan Kara  * Needs inode->i_lock held.
4774eff96ddSJan Kara  */
inode_add_lru(struct inode * inode)4784eff96ddSJan Kara void inode_add_lru(struct inode *inode)
4794eff96ddSJan Kara {
48051b8c1feSJohannes Weiner 	__inode_add_lru(inode, false);
4814eff96ddSJan Kara }
4824eff96ddSJan Kara 
inode_lru_list_del(struct inode * inode)4839e38d86fSNick Piggin static void inode_lru_list_del(struct inode *inode)
4849e38d86fSNick Piggin {
485bc3b14cbSDave Chinner 	if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
486fcb94f72SDave Chinner 		this_cpu_dec(nr_unused);
4871da177e4SLinus Torvalds }
4881da177e4SLinus Torvalds 
inode_pin_lru_isolating(struct inode * inode)489b9bda5f6SZhihao Cheng static void inode_pin_lru_isolating(struct inode *inode)
490b9bda5f6SZhihao Cheng {
491b9bda5f6SZhihao Cheng 	lockdep_assert_held(&inode->i_lock);
492b9bda5f6SZhihao Cheng 	WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE));
493b9bda5f6SZhihao Cheng 	inode->i_state |= I_LRU_ISOLATING;
494b9bda5f6SZhihao Cheng }
495b9bda5f6SZhihao Cheng 
inode_unpin_lru_isolating(struct inode * inode)496b9bda5f6SZhihao Cheng static void inode_unpin_lru_isolating(struct inode *inode)
497b9bda5f6SZhihao Cheng {
498b9bda5f6SZhihao Cheng 	spin_lock(&inode->i_lock);
499b9bda5f6SZhihao Cheng 	WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
500b9bda5f6SZhihao Cheng 	inode->i_state &= ~I_LRU_ISOLATING;
501b9bda5f6SZhihao Cheng 	smp_mb();
502b9bda5f6SZhihao Cheng 	wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
503b9bda5f6SZhihao Cheng 	spin_unlock(&inode->i_lock);
504b9bda5f6SZhihao Cheng }
505b9bda5f6SZhihao Cheng 
inode_wait_for_lru_isolating(struct inode * inode)506b9bda5f6SZhihao Cheng static void inode_wait_for_lru_isolating(struct inode *inode)
507b9bda5f6SZhihao Cheng {
508b9bda5f6SZhihao Cheng 	spin_lock(&inode->i_lock);
509b9bda5f6SZhihao Cheng 	if (inode->i_state & I_LRU_ISOLATING) {
510b9bda5f6SZhihao Cheng 		DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
511b9bda5f6SZhihao Cheng 		wait_queue_head_t *wqh;
512b9bda5f6SZhihao Cheng 
513b9bda5f6SZhihao Cheng 		wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
514b9bda5f6SZhihao Cheng 		spin_unlock(&inode->i_lock);
515b9bda5f6SZhihao Cheng 		__wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
516b9bda5f6SZhihao Cheng 		spin_lock(&inode->i_lock);
517b9bda5f6SZhihao Cheng 		WARN_ON(inode->i_state & I_LRU_ISOLATING);
518b9bda5f6SZhihao Cheng 	}
519b9bda5f6SZhihao Cheng 	spin_unlock(&inode->i_lock);
520b9bda5f6SZhihao Cheng }
521b9bda5f6SZhihao Cheng 
522646ec461SChristoph Hellwig /**
523646ec461SChristoph Hellwig  * inode_sb_list_add - add inode to the superblock list of inodes
524646ec461SChristoph Hellwig  * @inode: inode to add
525646ec461SChristoph Hellwig  */
inode_sb_list_add(struct inode * inode)526646ec461SChristoph Hellwig void inode_sb_list_add(struct inode *inode)
527646ec461SChristoph Hellwig {
52874278da9SDave Chinner 	spin_lock(&inode->i_sb->s_inode_list_lock);
52955fa6091SDave Chinner 	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
53074278da9SDave Chinner 	spin_unlock(&inode->i_sb->s_inode_list_lock);
531646ec461SChristoph Hellwig }
532646ec461SChristoph Hellwig EXPORT_SYMBOL_GPL(inode_sb_list_add);
533646ec461SChristoph Hellwig 
inode_sb_list_del(struct inode * inode)53455fa6091SDave Chinner static inline void inode_sb_list_del(struct inode *inode)
535646ec461SChristoph Hellwig {
536a209dfc7SEric Dumazet 	if (!list_empty(&inode->i_sb_list)) {
53774278da9SDave Chinner 		spin_lock(&inode->i_sb->s_inode_list_lock);
538646ec461SChristoph Hellwig 		list_del_init(&inode->i_sb_list);
53974278da9SDave Chinner 		spin_unlock(&inode->i_sb->s_inode_list_lock);
540646ec461SChristoph Hellwig 	}
541a209dfc7SEric Dumazet }
542646ec461SChristoph Hellwig 
hash(struct super_block * sb,unsigned long hashval)5434c51acbcSDave Chinner static unsigned long hash(struct super_block *sb, unsigned long hashval)
5444c51acbcSDave Chinner {
5454c51acbcSDave Chinner 	unsigned long tmp;
5464c51acbcSDave Chinner 
5474c51acbcSDave Chinner 	tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
5484c51acbcSDave Chinner 			L1_CACHE_BYTES;
5494b4563dcSChristoph Hellwig 	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
5504b4563dcSChristoph Hellwig 	return tmp & i_hash_mask;
5514c51acbcSDave Chinner }
5524c51acbcSDave Chinner 
5534c51acbcSDave Chinner /**
5544c51acbcSDave Chinner  *	__insert_inode_hash - hash an inode
5554c51acbcSDave Chinner  *	@inode: unhashed inode
5564c51acbcSDave Chinner  *	@hashval: unsigned long value used to locate this object in the
5574c51acbcSDave Chinner  *		inode_hashtable.
5584c51acbcSDave Chinner  *
5594c51acbcSDave Chinner  *	Add an inode to the inode hash for this superblock.
5604c51acbcSDave Chinner  */
__insert_inode_hash(struct inode * inode,unsigned long hashval)5614c51acbcSDave Chinner void __insert_inode_hash(struct inode *inode, unsigned long hashval)
5624c51acbcSDave Chinner {
563646ec461SChristoph Hellwig 	struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
564646ec461SChristoph Hellwig 
56567a23c49SDave Chinner 	spin_lock(&inode_hash_lock);
566250df6edSDave Chinner 	spin_lock(&inode->i_lock);
5673f19b2abSDavid Howells 	hlist_add_head_rcu(&inode->i_hash, b);
568250df6edSDave Chinner 	spin_unlock(&inode->i_lock);
56967a23c49SDave Chinner 	spin_unlock(&inode_hash_lock);
5704c51acbcSDave Chinner }
5714c51acbcSDave Chinner EXPORT_SYMBOL(__insert_inode_hash);
5724c51acbcSDave Chinner 
5734c51acbcSDave Chinner /**
574f2ee7abfSEric Dumazet  *	__remove_inode_hash - remove an inode from the hash
5754c51acbcSDave Chinner  *	@inode: inode to unhash
5764c51acbcSDave Chinner  *
5774c51acbcSDave Chinner  *	Remove an inode from the superblock.
5784c51acbcSDave Chinner  */
__remove_inode_hash(struct inode * inode)579f2ee7abfSEric Dumazet void __remove_inode_hash(struct inode *inode)
5804c51acbcSDave Chinner {
58167a23c49SDave Chinner 	spin_lock(&inode_hash_lock);
582250df6edSDave Chinner 	spin_lock(&inode->i_lock);
5833f19b2abSDavid Howells 	hlist_del_init_rcu(&inode->i_hash);
584250df6edSDave Chinner 	spin_unlock(&inode->i_lock);
58567a23c49SDave Chinner 	spin_unlock(&inode_hash_lock);
5864c51acbcSDave Chinner }
587f2ee7abfSEric Dumazet EXPORT_SYMBOL(__remove_inode_hash);
5884c51acbcSDave Chinner 
dump_mapping(const struct address_space * mapping)5893e9d80a8SMatthew Wilcox (Oracle) void dump_mapping(const struct address_space *mapping)
5903e9d80a8SMatthew Wilcox (Oracle) {
5913e9d80a8SMatthew Wilcox (Oracle) 	struct inode *host;
5923e9d80a8SMatthew Wilcox (Oracle) 	const struct address_space_operations *a_ops;
5933e9d80a8SMatthew Wilcox (Oracle) 	struct hlist_node *dentry_first;
5943e9d80a8SMatthew Wilcox (Oracle) 	struct dentry *dentry_ptr;
5953e9d80a8SMatthew Wilcox (Oracle) 	struct dentry dentry;
596*e0f6ee75SLi Zhijian 	char fname[64] = {};
5973e9d80a8SMatthew Wilcox (Oracle) 	unsigned long ino;
5983e9d80a8SMatthew Wilcox (Oracle) 
5993e9d80a8SMatthew Wilcox (Oracle) 	/*
6003e9d80a8SMatthew Wilcox (Oracle) 	 * If mapping is an invalid pointer, we don't want to crash
6013e9d80a8SMatthew Wilcox (Oracle) 	 * accessing it, so probe everything depending on it carefully.
6023e9d80a8SMatthew Wilcox (Oracle) 	 */
6033e9d80a8SMatthew Wilcox (Oracle) 	if (get_kernel_nofault(host, &mapping->host) ||
6043e9d80a8SMatthew Wilcox (Oracle) 	    get_kernel_nofault(a_ops, &mapping->a_ops)) {
6053e9d80a8SMatthew Wilcox (Oracle) 		pr_warn("invalid mapping:%px\n", mapping);
6063e9d80a8SMatthew Wilcox (Oracle) 		return;
6073e9d80a8SMatthew Wilcox (Oracle) 	}
6083e9d80a8SMatthew Wilcox (Oracle) 
6093e9d80a8SMatthew Wilcox (Oracle) 	if (!host) {
6103e9d80a8SMatthew Wilcox (Oracle) 		pr_warn("aops:%ps\n", a_ops);
6113e9d80a8SMatthew Wilcox (Oracle) 		return;
6123e9d80a8SMatthew Wilcox (Oracle) 	}
6133e9d80a8SMatthew Wilcox (Oracle) 
6143e9d80a8SMatthew Wilcox (Oracle) 	if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
6153e9d80a8SMatthew Wilcox (Oracle) 	    get_kernel_nofault(ino, &host->i_ino)) {
6163e9d80a8SMatthew Wilcox (Oracle) 		pr_warn("aops:%ps invalid inode:%px\n", a_ops, host);
6173e9d80a8SMatthew Wilcox (Oracle) 		return;
6183e9d80a8SMatthew Wilcox (Oracle) 	}
6193e9d80a8SMatthew Wilcox (Oracle) 
6203e9d80a8SMatthew Wilcox (Oracle) 	if (!dentry_first) {
6213e9d80a8SMatthew Wilcox (Oracle) 		pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
6223e9d80a8SMatthew Wilcox (Oracle) 		return;
6233e9d80a8SMatthew Wilcox (Oracle) 	}
6243e9d80a8SMatthew Wilcox (Oracle) 
6253e9d80a8SMatthew Wilcox (Oracle) 	dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
6263e9d80a8SMatthew Wilcox (Oracle) 	if (get_kernel_nofault(dentry, dentry_ptr)) {
6273e9d80a8SMatthew Wilcox (Oracle) 		pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
6283e9d80a8SMatthew Wilcox (Oracle) 				a_ops, ino, dentry_ptr);
6293e9d80a8SMatthew Wilcox (Oracle) 		return;
6303e9d80a8SMatthew Wilcox (Oracle) 	}
6313e9d80a8SMatthew Wilcox (Oracle) 
632*e0f6ee75SLi Zhijian 	if (strncpy_from_kernel_nofault(fname, dentry.d_name.name, 63) < 0)
633*e0f6ee75SLi Zhijian 		strscpy(fname, "<invalid>", 63);
6343e9d80a8SMatthew Wilcox (Oracle) 	/*
635*e0f6ee75SLi Zhijian 	 * Even if strncpy_from_kernel_nofault() succeeded,
636*e0f6ee75SLi Zhijian 	 * the fname could be unreliable
6373e9d80a8SMatthew Wilcox (Oracle) 	 */
638*e0f6ee75SLi Zhijian 	pr_warn("aops:%ps ino:%lx dentry name(?):\"%s\"\n",
639*e0f6ee75SLi Zhijian 		a_ops, ino, fname);
6403e9d80a8SMatthew Wilcox (Oracle) }
6413e9d80a8SMatthew Wilcox (Oracle) 
clear_inode(struct inode * inode)642dbd5768fSJan Kara void clear_inode(struct inode *inode)
643b0683aa6SAl Viro {
64408142579SJan Kara 	/*
645b93b0163SMatthew Wilcox 	 * We have to cycle the i_pages lock here because reclaim can be in the
6466ffcd825SMatthew Wilcox (Oracle) 	 * process of removing the last page (in __filemap_remove_folio())
647b93b0163SMatthew Wilcox 	 * and we must not free the mapping under it.
64808142579SJan Kara 	 */
649b93b0163SMatthew Wilcox 	xa_lock_irq(&inode->i_data.i_pages);
650b0683aa6SAl Viro 	BUG_ON(inode->i_data.nrpages);
651786b3112SHugh Dickins 	/*
652786b3112SHugh Dickins 	 * Almost always, mapping_empty(&inode->i_data) here; but there are
653786b3112SHugh Dickins 	 * two known and long-standing ways in which nodes may get left behind
654786b3112SHugh Dickins 	 * (when deep radix-tree node allocation failed partway; or when THP
655786b3112SHugh Dickins 	 * collapse_file() failed). Until those two known cases are cleaned up,
656786b3112SHugh Dickins 	 * or a cleanup function is called here, do not BUG_ON(!mapping_empty),
657786b3112SHugh Dickins 	 * nor even WARN_ON(!mapping_empty).
658786b3112SHugh Dickins 	 */
659b93b0163SMatthew Wilcox 	xa_unlock_irq(&inode->i_data.i_pages);
660b0683aa6SAl Viro 	BUG_ON(!list_empty(&inode->i_data.private_list));
661b0683aa6SAl Viro 	BUG_ON(!(inode->i_state & I_FREEING));
662b0683aa6SAl Viro 	BUG_ON(inode->i_state & I_CLEAR);
6636c60d2b5SDave Chinner 	BUG_ON(!list_empty(&inode->i_wb_list));
664fa0d7e3dSNick Piggin 	/* don't need i_lock here, no concurrent mods to i_state */
665b0683aa6SAl Viro 	inode->i_state = I_FREEING | I_CLEAR;
666b0683aa6SAl Viro }
667dbd5768fSJan Kara EXPORT_SYMBOL(clear_inode);
668b0683aa6SAl Viro 
669b2b2af8eSDave Chinner /*
670b2b2af8eSDave Chinner  * Free the inode passed in, removing it from the lists it is still connected
671b2b2af8eSDave Chinner  * to. We remove any pages still attached to the inode and wait for any IO that
672b2b2af8eSDave Chinner  * is still in progress before finally destroying the inode.
673b2b2af8eSDave Chinner  *
674b2b2af8eSDave Chinner  * An inode must already be marked I_FREEING so that we avoid the inode being
675b2b2af8eSDave Chinner  * moved back onto lists if we race with other code that manipulates the lists
676b2b2af8eSDave Chinner  * (e.g. writeback_single_inode). The caller is responsible for setting this.
677b2b2af8eSDave Chinner  *
678b2b2af8eSDave Chinner  * An inode must already be removed from the LRU list before being evicted from
679b2b2af8eSDave Chinner  * the cache. This should occur atomically with setting the I_FREEING state
680b2b2af8eSDave Chinner  * flag, so no inodes here should ever be on the LRU when being evicted.
681b2b2af8eSDave Chinner  */
evict(struct inode * inode)682644da596SAl Viro static void evict(struct inode *inode)
683b4272d4cSAl Viro {
684b4272d4cSAl Viro 	const struct super_operations *op = inode->i_sb->s_op;
685b4272d4cSAl Viro 
686b2b2af8eSDave Chinner 	BUG_ON(!(inode->i_state & I_FREEING));
687b2b2af8eSDave Chinner 	BUG_ON(!list_empty(&inode->i_lru));
688b2b2af8eSDave Chinner 
689c7f54084SDave Chinner 	if (!list_empty(&inode->i_io_list))
690c7f54084SDave Chinner 		inode_io_list_del(inode);
691b12362bdSEric Dumazet 
69255fa6091SDave Chinner 	inode_sb_list_del(inode);
69355fa6091SDave Chinner 
694b9bda5f6SZhihao Cheng 	inode_wait_for_lru_isolating(inode);
695b9bda5f6SZhihao Cheng 
696169ebd90SJan Kara 	/*
697169ebd90SJan Kara 	 * Wait for flusher thread to be done with the inode so that filesystem
698169ebd90SJan Kara 	 * does not start destroying it while writeback is still running. Since
699169ebd90SJan Kara 	 * the inode has I_FREEING set, flusher thread won't start new work on
700169ebd90SJan Kara 	 * the inode.  We just have to wait for running writeback to finish.
701169ebd90SJan Kara 	 */
702169ebd90SJan Kara 	inode_wait_for_writeback(inode);
7037994e6f7SJan Kara 
704be7ce416SAl Viro 	if (op->evict_inode) {
705be7ce416SAl Viro 		op->evict_inode(inode);
706b4272d4cSAl Viro 	} else {
70791b0abe3SJohannes Weiner 		truncate_inode_pages_final(&inode->i_data);
708dbd5768fSJan Kara 		clear_inode(inode);
709b4272d4cSAl Viro 	}
710661074e9SAl Viro 	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
711661074e9SAl Viro 		cd_forget(inode);
712b2b2af8eSDave Chinner 
713b2b2af8eSDave Chinner 	remove_inode_hash(inode);
714b2b2af8eSDave Chinner 
715b2b2af8eSDave Chinner 	spin_lock(&inode->i_lock);
716b2b2af8eSDave Chinner 	wake_up_bit(&inode->i_state, __I_NEW);
717b2b2af8eSDave Chinner 	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
718b2b2af8eSDave Chinner 	spin_unlock(&inode->i_lock);
719b2b2af8eSDave Chinner 
720b2b2af8eSDave Chinner 	destroy_inode(inode);
721b4272d4cSAl Viro }
722b4272d4cSAl Viro 
7231da177e4SLinus Torvalds /*
7241da177e4SLinus Torvalds  * dispose_list - dispose of the contents of a local list
7251da177e4SLinus Torvalds  * @head: the head of the list to free
7261da177e4SLinus Torvalds  *
7271da177e4SLinus Torvalds  * Dispose-list gets a local list with local inodes in it, so it doesn't
7281da177e4SLinus Torvalds  * need to worry about list corruption and SMP locks.
7291da177e4SLinus Torvalds  */
dispose_list(struct list_head * head)7301da177e4SLinus Torvalds static void dispose_list(struct list_head *head)
7311da177e4SLinus Torvalds {
7321da177e4SLinus Torvalds 	while (!list_empty(head)) {
7331da177e4SLinus Torvalds 		struct inode *inode;
7341da177e4SLinus Torvalds 
7357ccf19a8SNick Piggin 		inode = list_first_entry(head, struct inode, i_lru);
7367ccf19a8SNick Piggin 		list_del_init(&inode->i_lru);
7371da177e4SLinus Torvalds 
738644da596SAl Viro 		evict(inode);
739ac05fbb4SJosef Bacik 		cond_resched();
7401da177e4SLinus Torvalds 	}
7411da177e4SLinus Torvalds }
7421da177e4SLinus Torvalds 
7431da177e4SLinus Torvalds /**
74463997e98SAl Viro  * evict_inodes	- evict all evictable inodes for a superblock
74563997e98SAl Viro  * @sb:		superblock to operate on
7461da177e4SLinus Torvalds  *
74763997e98SAl Viro  * Make sure that no inodes with zero refcount are retained.  This is
7481751e8a6SLinus Torvalds  * called by superblock shutdown after having SB_ACTIVE flag removed,
74963997e98SAl Viro  * so any inode reaching zero refcount during or after that call will
75063997e98SAl Viro  * be immediately evicted.
75163997e98SAl Viro  */
evict_inodes(struct super_block * sb)75263997e98SAl Viro void evict_inodes(struct super_block *sb)
75363997e98SAl Viro {
75463997e98SAl Viro 	struct inode *inode, *next;
75563997e98SAl Viro 	LIST_HEAD(dispose);
75663997e98SAl Viro 
757ac05fbb4SJosef Bacik again:
75874278da9SDave Chinner 	spin_lock(&sb->s_inode_list_lock);
75963997e98SAl Viro 	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
76063997e98SAl Viro 		if (atomic_read(&inode->i_count))
76163997e98SAl Viro 			continue;
762250df6edSDave Chinner 
763250df6edSDave Chinner 		spin_lock(&inode->i_lock);
7640eed942bSJulian Sun 		if (atomic_read(&inode->i_count)) {
7650eed942bSJulian Sun 			spin_unlock(&inode->i_lock);
7660eed942bSJulian Sun 			continue;
7670eed942bSJulian Sun 		}
768250df6edSDave Chinner 		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
769250df6edSDave Chinner 			spin_unlock(&inode->i_lock);
77063997e98SAl Viro 			continue;
771250df6edSDave Chinner 		}
77263997e98SAl Viro 
77363997e98SAl Viro 		inode->i_state |= I_FREEING;
77402afc410SDave Chinner 		inode_lru_list_del(inode);
775250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
77602afc410SDave Chinner 		list_add(&inode->i_lru, &dispose);
777ac05fbb4SJosef Bacik 
778ac05fbb4SJosef Bacik 		/*
779ac05fbb4SJosef Bacik 		 * We can have a ton of inodes to evict at unmount time given
780ac05fbb4SJosef Bacik 		 * enough memory, check to see if we need to go to sleep for a
781ac05fbb4SJosef Bacik 		 * bit so we don't livelock.
782ac05fbb4SJosef Bacik 		 */
783ac05fbb4SJosef Bacik 		if (need_resched()) {
784ac05fbb4SJosef Bacik 			spin_unlock(&sb->s_inode_list_lock);
785ac05fbb4SJosef Bacik 			cond_resched();
786ac05fbb4SJosef Bacik 			dispose_list(&dispose);
787ac05fbb4SJosef Bacik 			goto again;
788ac05fbb4SJosef Bacik 		}
78963997e98SAl Viro 	}
79074278da9SDave Chinner 	spin_unlock(&sb->s_inode_list_lock);
79163997e98SAl Viro 
79263997e98SAl Viro 	dispose_list(&dispose);
79363997e98SAl Viro }
794799ea9e9SDarrick J. Wong EXPORT_SYMBOL_GPL(evict_inodes);
79563997e98SAl Viro 
79663997e98SAl Viro /**
797a0318786SChristoph Hellwig  * invalidate_inodes	- attempt to free all inodes on a superblock
798a0318786SChristoph Hellwig  * @sb:		superblock to operate on
799a0318786SChristoph Hellwig  *
800e127b9bcSChristoph Hellwig  * Attempts to free all inodes (including dirty inodes) for a given superblock.
8011da177e4SLinus Torvalds  */
invalidate_inodes(struct super_block * sb)802e127b9bcSChristoph Hellwig void invalidate_inodes(struct super_block *sb)
8031da177e4SLinus Torvalds {
804a0318786SChristoph Hellwig 	struct inode *inode, *next;
805a0318786SChristoph Hellwig 	LIST_HEAD(dispose);
8061da177e4SLinus Torvalds 
80704646aebSEric Sandeen again:
80874278da9SDave Chinner 	spin_lock(&sb->s_inode_list_lock);
809a0318786SChristoph Hellwig 	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
810250df6edSDave Chinner 		spin_lock(&inode->i_lock);
811250df6edSDave Chinner 		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
812250df6edSDave Chinner 			spin_unlock(&inode->i_lock);
8131da177e4SLinus Torvalds 			continue;
814250df6edSDave Chinner 		}
81599a38919SChristoph Hellwig 		if (atomic_read(&inode->i_count)) {
816250df6edSDave Chinner 			spin_unlock(&inode->i_lock);
81799a38919SChristoph Hellwig 			continue;
81899a38919SChristoph Hellwig 		}
81999a38919SChristoph Hellwig 
8201da177e4SLinus Torvalds 		inode->i_state |= I_FREEING;
82102afc410SDave Chinner 		inode_lru_list_del(inode);
822250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
82302afc410SDave Chinner 		list_add(&inode->i_lru, &dispose);
82404646aebSEric Sandeen 		if (need_resched()) {
82504646aebSEric Sandeen 			spin_unlock(&sb->s_inode_list_lock);
82604646aebSEric Sandeen 			cond_resched();
82704646aebSEric Sandeen 			dispose_list(&dispose);
82804646aebSEric Sandeen 			goto again;
82904646aebSEric Sandeen 		}
8301da177e4SLinus Torvalds 	}
83174278da9SDave Chinner 	spin_unlock(&sb->s_inode_list_lock);
8321da177e4SLinus Torvalds 
833a0318786SChristoph Hellwig 	dispose_list(&dispose);
8341da177e4SLinus Torvalds }
8351da177e4SLinus Torvalds 
8361da177e4SLinus Torvalds /*
837bc3b14cbSDave Chinner  * Isolate the inode from the LRU in preparation for freeing it.
8381da177e4SLinus Torvalds  *
8399e38d86fSNick Piggin  * If the inode has the I_REFERENCED flag set, then it means that it has been
8409e38d86fSNick Piggin  * used recently - the flag is set in iput_final(). When we encounter such an
8419e38d86fSNick Piggin  * inode, clear the flag and move it to the back of the LRU so it gets another
8429e38d86fSNick Piggin  * pass through the LRU before it gets reclaimed. This is necessary because of
8439e38d86fSNick Piggin  * the fact we are doing lazy LRU updates to minimise lock contention so the
8449e38d86fSNick Piggin  * LRU does not have strict ordering. Hence we don't want to reclaim inodes
8459e38d86fSNick Piggin  * with this flag set because they are the inodes that are out of order.
8461da177e4SLinus Torvalds  */
inode_lru_isolate(struct list_head * item,struct list_lru_one * lru,spinlock_t * lru_lock,void * arg)8473f97b163SVladimir Davydov static enum lru_status inode_lru_isolate(struct list_head *item,
8483f97b163SVladimir Davydov 		struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
8491da177e4SLinus Torvalds {
850bc3b14cbSDave Chinner 	struct list_head *freeable = arg;
851bc3b14cbSDave Chinner 	struct inode	*inode = container_of(item, struct inode, i_lru);
8521da177e4SLinus Torvalds 
8539e38d86fSNick Piggin 	/*
85451b8c1feSJohannes Weiner 	 * We are inverting the lru lock/inode->i_lock here, so use a
85551b8c1feSJohannes Weiner 	 * trylock. If we fail to get the lock, just skip it.
85602afc410SDave Chinner 	 */
857bc3b14cbSDave Chinner 	if (!spin_trylock(&inode->i_lock))
858bc3b14cbSDave Chinner 		return LRU_SKIP;
85902afc410SDave Chinner 
86002afc410SDave Chinner 	/*
86151b8c1feSJohannes Weiner 	 * Inodes can get referenced, redirtied, or repopulated while
86251b8c1feSJohannes Weiner 	 * they're already on the LRU, and this can make them
86351b8c1feSJohannes Weiner 	 * unreclaimable for a while. Remove them lazily here; iput,
86451b8c1feSJohannes Weiner 	 * sync, or the last page cache deletion will requeue them.
8659e38d86fSNick Piggin 	 */
8669e38d86fSNick Piggin 	if (atomic_read(&inode->i_count) ||
86751b8c1feSJohannes Weiner 	    (inode->i_state & ~I_REFERENCED) ||
86851b8c1feSJohannes Weiner 	    !mapping_shrinkable(&inode->i_data)) {
8693f97b163SVladimir Davydov 		list_lru_isolate(lru, &inode->i_lru);
870f283c86aSDave Chinner 		spin_unlock(&inode->i_lock);
871fcb94f72SDave Chinner 		this_cpu_dec(nr_unused);
872bc3b14cbSDave Chinner 		return LRU_REMOVED;
8739e38d86fSNick Piggin 	}
8749e38d86fSNick Piggin 
87551b8c1feSJohannes Weiner 	/* Recently referenced inodes get one more pass */
87669056ee6SDave Chinner 	if (inode->i_state & I_REFERENCED) {
8779e38d86fSNick Piggin 		inode->i_state &= ~I_REFERENCED;
878f283c86aSDave Chinner 		spin_unlock(&inode->i_lock);
879bc3b14cbSDave Chinner 		return LRU_ROTATE;
8801da177e4SLinus Torvalds 	}
881bc3b14cbSDave Chinner 
88251b8c1feSJohannes Weiner 	/*
88351b8c1feSJohannes Weiner 	 * On highmem systems, mapping_shrinkable() permits dropping
88451b8c1feSJohannes Weiner 	 * page cache in order to free up struct inodes: lowmem might
88551b8c1feSJohannes Weiner 	 * be under pressure before the cache inside the highmem zone.
88651b8c1feSJohannes Weiner 	 */
8877ae12c80SJohannes Weiner 	if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
888b9bda5f6SZhihao Cheng 		inode_pin_lru_isolating(inode);
889250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
890bc3b14cbSDave Chinner 		spin_unlock(lru_lock);
891bc3b14cbSDave Chinner 		if (remove_inode_buffers(inode)) {
892bc3b14cbSDave Chinner 			unsigned long reap;
893bc3b14cbSDave Chinner 			reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
894f8891e5eSChristoph Lameter 			if (current_is_kswapd())
895f8891e5eSChristoph Lameter 				__count_vm_events(KSWAPD_INODESTEAL, reap);
896f8891e5eSChristoph Lameter 			else
897f8891e5eSChristoph Lameter 				__count_vm_events(PGINODESTEAL, reap);
898c7b23b68SYosry Ahmed 			mm_account_reclaimed_pages(reap);
899bc3b14cbSDave Chinner 		}
900b9bda5f6SZhihao Cheng 		inode_unpin_lru_isolating(inode);
901bc3b14cbSDave Chinner 		spin_lock(lru_lock);
902bc3b14cbSDave Chinner 		return LRU_RETRY;
903bc3b14cbSDave Chinner 	}
9041da177e4SLinus Torvalds 
905bc3b14cbSDave Chinner 	WARN_ON(inode->i_state & I_NEW);
906bc3b14cbSDave Chinner 	inode->i_state |= I_FREEING;
9073f97b163SVladimir Davydov 	list_lru_isolate_move(lru, &inode->i_lru, freeable);
908bc3b14cbSDave Chinner 	spin_unlock(&inode->i_lock);
909bc3b14cbSDave Chinner 
910bc3b14cbSDave Chinner 	this_cpu_dec(nr_unused);
911bc3b14cbSDave Chinner 	return LRU_REMOVED;
912bc3b14cbSDave Chinner }
913bc3b14cbSDave Chinner 
914bc3b14cbSDave Chinner /*
915bc3b14cbSDave Chinner  * Walk the superblock inode LRU for freeable inodes and attempt to free them.
916bc3b14cbSDave Chinner  * This is called from the superblock shrinker function with a number of inodes
917bc3b14cbSDave Chinner  * to trim from the LRU. Inodes to be freed are moved to a temporary list and
918bc3b14cbSDave Chinner  * then are freed outside inode_lock by dispose_list().
919bc3b14cbSDave Chinner  */
prune_icache_sb(struct super_block * sb,struct shrink_control * sc)920503c358cSVladimir Davydov long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
921bc3b14cbSDave Chinner {
922bc3b14cbSDave Chinner 	LIST_HEAD(freeable);
923bc3b14cbSDave Chinner 	long freed;
924bc3b14cbSDave Chinner 
925503c358cSVladimir Davydov 	freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
926503c358cSVladimir Davydov 				     inode_lru_isolate, &freeable);
9271da177e4SLinus Torvalds 	dispose_list(&freeable);
9280a234c6dSDave Chinner 	return freed;
9291da177e4SLinus Torvalds }
9301da177e4SLinus Torvalds 
9311da177e4SLinus Torvalds static void __wait_on_freeing_inode(struct inode *inode);
9321da177e4SLinus Torvalds /*
9331da177e4SLinus Torvalds  * Called with the inode lock held.
9341da177e4SLinus Torvalds  */
find_inode(struct super_block * sb,struct hlist_head * head,int (* test)(struct inode *,void *),void * data)9356b3304b5SManish Katiyar static struct inode *find_inode(struct super_block *sb,
9366b3304b5SManish Katiyar 				struct hlist_head *head,
9376b3304b5SManish Katiyar 				int (*test)(struct inode *, void *),
9386b3304b5SManish Katiyar 				void *data)
9391da177e4SLinus Torvalds {
9401da177e4SLinus Torvalds 	struct inode *inode = NULL;
9411da177e4SLinus Torvalds 
9421da177e4SLinus Torvalds repeat:
943b67bfe0dSSasha Levin 	hlist_for_each_entry(inode, head, i_hash) {
9445a3cd992SAl Viro 		if (inode->i_sb != sb)
9455a3cd992SAl Viro 			continue;
9465a3cd992SAl Viro 		if (!test(inode, data))
9475a3cd992SAl Viro 			continue;
948250df6edSDave Chinner 		spin_lock(&inode->i_lock);
949a4ffdde6SAl Viro 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
9501da177e4SLinus Torvalds 			__wait_on_freeing_inode(inode);
9511da177e4SLinus Torvalds 			goto repeat;
9521da177e4SLinus Torvalds 		}
953c2b6d621SAl Viro 		if (unlikely(inode->i_state & I_CREATING)) {
954c2b6d621SAl Viro 			spin_unlock(&inode->i_lock);
955c2b6d621SAl Viro 			return ERR_PTR(-ESTALE);
956c2b6d621SAl Viro 		}
957f7899bd5SChristoph Hellwig 		__iget(inode);
958250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
959f7899bd5SChristoph Hellwig 		return inode;
9601da177e4SLinus Torvalds 	}
961f7899bd5SChristoph Hellwig 	return NULL;
9621da177e4SLinus Torvalds }
9631da177e4SLinus Torvalds 
9641da177e4SLinus Torvalds /*
9651da177e4SLinus Torvalds  * find_inode_fast is the fast path version of find_inode, see the comment at
9661da177e4SLinus Torvalds  * iget_locked for details.
9671da177e4SLinus Torvalds  */
find_inode_fast(struct super_block * sb,struct hlist_head * head,unsigned long ino)9686b3304b5SManish Katiyar static struct inode *find_inode_fast(struct super_block *sb,
9696b3304b5SManish Katiyar 				struct hlist_head *head, unsigned long ino)
9701da177e4SLinus Torvalds {
9711da177e4SLinus Torvalds 	struct inode *inode = NULL;
9721da177e4SLinus Torvalds 
9731da177e4SLinus Torvalds repeat:
974b67bfe0dSSasha Levin 	hlist_for_each_entry(inode, head, i_hash) {
9755a3cd992SAl Viro 		if (inode->i_ino != ino)
9765a3cd992SAl Viro 			continue;
9775a3cd992SAl Viro 		if (inode->i_sb != sb)
9785a3cd992SAl Viro 			continue;
979250df6edSDave Chinner 		spin_lock(&inode->i_lock);
980a4ffdde6SAl Viro 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
9811da177e4SLinus Torvalds 			__wait_on_freeing_inode(inode);
9821da177e4SLinus Torvalds 			goto repeat;
9831da177e4SLinus Torvalds 		}
984c2b6d621SAl Viro 		if (unlikely(inode->i_state & I_CREATING)) {
985c2b6d621SAl Viro 			spin_unlock(&inode->i_lock);
986c2b6d621SAl Viro 			return ERR_PTR(-ESTALE);
987c2b6d621SAl Viro 		}
988f7899bd5SChristoph Hellwig 		__iget(inode);
989250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
990f7899bd5SChristoph Hellwig 		return inode;
9911da177e4SLinus Torvalds 	}
992f7899bd5SChristoph Hellwig 	return NULL;
9931da177e4SLinus Torvalds }
9941da177e4SLinus Torvalds 
995f991bd2eSEric Dumazet /*
996f991bd2eSEric Dumazet  * Each cpu owns a range of LAST_INO_BATCH numbers.
997f991bd2eSEric Dumazet  * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
998f991bd2eSEric Dumazet  * to renew the exhausted range.
9998290c35fSDavid Chinner  *
1000f991bd2eSEric Dumazet  * This does not significantly increase overflow rate because every CPU can
1001f991bd2eSEric Dumazet  * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
1002f991bd2eSEric Dumazet  * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
1003f991bd2eSEric Dumazet  * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
1004f991bd2eSEric Dumazet  * overflow rate by 2x, which does not seem too significant.
1005f991bd2eSEric Dumazet  *
1006f991bd2eSEric Dumazet  * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
1007f991bd2eSEric Dumazet  * error if st_ino won't fit in target struct field. Use 32bit counter
1008f991bd2eSEric Dumazet  * here to attempt to avoid that.
10098290c35fSDavid Chinner  */
1010f991bd2eSEric Dumazet #define LAST_INO_BATCH 1024
1011f991bd2eSEric Dumazet static DEFINE_PER_CPU(unsigned int, last_ino);
10128290c35fSDavid Chinner 
get_next_ino(void)101385fe4025SChristoph Hellwig unsigned int get_next_ino(void)
1014f991bd2eSEric Dumazet {
1015f991bd2eSEric Dumazet 	unsigned int *p = &get_cpu_var(last_ino);
1016f991bd2eSEric Dumazet 	unsigned int res = *p;
1017f991bd2eSEric Dumazet 
1018f991bd2eSEric Dumazet #ifdef CONFIG_SMP
1019f991bd2eSEric Dumazet 	if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
1020f991bd2eSEric Dumazet 		static atomic_t shared_last_ino;
1021f991bd2eSEric Dumazet 		int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
1022f991bd2eSEric Dumazet 
1023f991bd2eSEric Dumazet 		res = next - LAST_INO_BATCH;
10248290c35fSDavid Chinner 	}
1025f991bd2eSEric Dumazet #endif
1026f991bd2eSEric Dumazet 
10272adc376cSCarlos Maiolino 	res++;
10282adc376cSCarlos Maiolino 	/* get_next_ino should not provide a 0 inode number */
10292adc376cSCarlos Maiolino 	if (unlikely(!res))
10302adc376cSCarlos Maiolino 		res++;
10312adc376cSCarlos Maiolino 	*p = res;
1032f991bd2eSEric Dumazet 	put_cpu_var(last_ino);
1033f991bd2eSEric Dumazet 	return res;
1034f991bd2eSEric Dumazet }
103585fe4025SChristoph Hellwig EXPORT_SYMBOL(get_next_ino);
10368290c35fSDavid Chinner 
10371da177e4SLinus Torvalds /**
1038a209dfc7SEric Dumazet  *	new_inode_pseudo 	- obtain an inode
1039a209dfc7SEric Dumazet  *	@sb: superblock
1040a209dfc7SEric Dumazet  *
1041a209dfc7SEric Dumazet  *	Allocates a new inode for given superblock.
1042a209dfc7SEric Dumazet  *	Inode wont be chained in superblock s_inodes list
1043a209dfc7SEric Dumazet  *	This means :
1044a209dfc7SEric Dumazet  *	- fs can't be unmount
1045a209dfc7SEric Dumazet  *	- quotas, fsnotify, writeback can't work
1046a209dfc7SEric Dumazet  */
new_inode_pseudo(struct super_block * sb)1047a209dfc7SEric Dumazet struct inode *new_inode_pseudo(struct super_block *sb)
1048a209dfc7SEric Dumazet {
1049a209dfc7SEric Dumazet 	struct inode *inode = alloc_inode(sb);
1050a209dfc7SEric Dumazet 
1051a209dfc7SEric Dumazet 	if (inode) {
1052a209dfc7SEric Dumazet 		spin_lock(&inode->i_lock);
1053a209dfc7SEric Dumazet 		inode->i_state = 0;
1054a209dfc7SEric Dumazet 		spin_unlock(&inode->i_lock);
1055a209dfc7SEric Dumazet 	}
1056a209dfc7SEric Dumazet 	return inode;
1057a209dfc7SEric Dumazet }
1058a209dfc7SEric Dumazet 
1059a209dfc7SEric Dumazet /**
10601da177e4SLinus Torvalds  *	new_inode 	- obtain an inode
10611da177e4SLinus Torvalds  *	@sb: superblock
10621da177e4SLinus Torvalds  *
1063769848c0SMel Gorman  *	Allocates a new inode for given superblock. The default gfp_mask
10643c1d4378SHugh Dickins  *	for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
1065769848c0SMel Gorman  *	If HIGHMEM pages are unsuitable or it is known that pages allocated
1066769848c0SMel Gorman  *	for the page cache are not reclaimable or migratable,
1067769848c0SMel Gorman  *	mapping_set_gfp_mask() must be called with suitable flags on the
1068769848c0SMel Gorman  *	newly created inode's mapping
1069769848c0SMel Gorman  *
10701da177e4SLinus Torvalds  */
new_inode(struct super_block * sb)10711da177e4SLinus Torvalds struct inode *new_inode(struct super_block *sb)
10721da177e4SLinus Torvalds {
10731da177e4SLinus Torvalds 	struct inode *inode;
10741da177e4SLinus Torvalds 
1075a209dfc7SEric Dumazet 	inode = new_inode_pseudo(sb);
1076a209dfc7SEric Dumazet 	if (inode)
107755fa6091SDave Chinner 		inode_sb_list_add(inode);
10781da177e4SLinus Torvalds 	return inode;
10791da177e4SLinus Torvalds }
10801da177e4SLinus Torvalds EXPORT_SYMBOL(new_inode);
10811da177e4SLinus Torvalds 
108214358e6dSPeter Zijlstra #ifdef CONFIG_DEBUG_LOCK_ALLOC
lockdep_annotate_inode_mutex_key(struct inode * inode)1083e096d0c7SJosh Boyer void lockdep_annotate_inode_mutex_key(struct inode *inode)
1084e096d0c7SJosh Boyer {
1085a3314a0eSNamhyung Kim 	if (S_ISDIR(inode->i_mode)) {
108614358e6dSPeter Zijlstra 		struct file_system_type *type = inode->i_sb->s_type;
10871e89a5e1SPeter Zijlstra 
10889a7aa12fSJan Kara 		/* Set new key only if filesystem hasn't already changed it */
10899902af79SAl Viro 		if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
109014358e6dSPeter Zijlstra 			/*
109114358e6dSPeter Zijlstra 			 * ensure nobody is actually holding i_mutex
109214358e6dSPeter Zijlstra 			 */
10939902af79SAl Viro 			// mutex_destroy(&inode->i_mutex);
10949902af79SAl Viro 			init_rwsem(&inode->i_rwsem);
10959902af79SAl Viro 			lockdep_set_class(&inode->i_rwsem,
10969a7aa12fSJan Kara 					  &type->i_mutex_dir_key);
10979a7aa12fSJan Kara 		}
10981e89a5e1SPeter Zijlstra 	}
1099e096d0c7SJosh Boyer }
1100e096d0c7SJosh Boyer EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
110114358e6dSPeter Zijlstra #endif
1102e096d0c7SJosh Boyer 
1103e096d0c7SJosh Boyer /**
1104e096d0c7SJosh Boyer  * unlock_new_inode - clear the I_NEW state and wake up any waiters
1105e096d0c7SJosh Boyer  * @inode:	new inode to unlock
1106e096d0c7SJosh Boyer  *
1107e096d0c7SJosh Boyer  * Called when the inode is fully initialised to clear the new state of the
1108e096d0c7SJosh Boyer  * inode and wake up anyone waiting for the inode to finish initialisation.
1109e096d0c7SJosh Boyer  */
unlock_new_inode(struct inode * inode)1110e096d0c7SJosh Boyer void unlock_new_inode(struct inode *inode)
1111e096d0c7SJosh Boyer {
1112e096d0c7SJosh Boyer 	lockdep_annotate_inode_mutex_key(inode);
1113250df6edSDave Chinner 	spin_lock(&inode->i_lock);
1114eaff8079SChristoph Hellwig 	WARN_ON(!(inode->i_state & I_NEW));
1115c2b6d621SAl Viro 	inode->i_state &= ~I_NEW & ~I_CREATING;
1116310fa7a3SAl Viro 	smp_mb();
1117250df6edSDave Chinner 	wake_up_bit(&inode->i_state, __I_NEW);
1118250df6edSDave Chinner 	spin_unlock(&inode->i_lock);
11191da177e4SLinus Torvalds }
11201da177e4SLinus Torvalds EXPORT_SYMBOL(unlock_new_inode);
11211da177e4SLinus Torvalds 
discard_new_inode(struct inode * inode)1122c2b6d621SAl Viro void discard_new_inode(struct inode *inode)
1123c2b6d621SAl Viro {
1124c2b6d621SAl Viro 	lockdep_annotate_inode_mutex_key(inode);
1125c2b6d621SAl Viro 	spin_lock(&inode->i_lock);
1126c2b6d621SAl Viro 	WARN_ON(!(inode->i_state & I_NEW));
1127c2b6d621SAl Viro 	inode->i_state &= ~I_NEW;
1128c2b6d621SAl Viro 	smp_mb();
1129c2b6d621SAl Viro 	wake_up_bit(&inode->i_state, __I_NEW);
1130c2b6d621SAl Viro 	spin_unlock(&inode->i_lock);
1131c2b6d621SAl Viro 	iput(inode);
1132c2b6d621SAl Viro }
1133c2b6d621SAl Viro EXPORT_SYMBOL(discard_new_inode);
1134c2b6d621SAl Viro 
11350b2d0724SChristoph Hellwig /**
1136f23ce757SJan Kara  * lock_two_inodes - lock two inodes (may be regular files but also dirs)
1137f23ce757SJan Kara  *
1138f23ce757SJan Kara  * Lock any non-NULL argument. The caller must make sure that if he is passing
1139f23ce757SJan Kara  * in two directories, one is not ancestor of the other.  Zero, one or two
1140f23ce757SJan Kara  * objects may be locked by this function.
1141f23ce757SJan Kara  *
1142f23ce757SJan Kara  * @inode1: first inode to lock
1143f23ce757SJan Kara  * @inode2: second inode to lock
1144f23ce757SJan Kara  * @subclass1: inode lock subclass for the first lock obtained
1145f23ce757SJan Kara  * @subclass2: inode lock subclass for the second lock obtained
1146f23ce757SJan Kara  */
lock_two_inodes(struct inode * inode1,struct inode * inode2,unsigned subclass1,unsigned subclass2)1147f23ce757SJan Kara void lock_two_inodes(struct inode *inode1, struct inode *inode2,
1148f23ce757SJan Kara 		     unsigned subclass1, unsigned subclass2)
1149f23ce757SJan Kara {
1150f23ce757SJan Kara 	if (!inode1 || !inode2) {
1151f23ce757SJan Kara 		/*
1152f23ce757SJan Kara 		 * Make sure @subclass1 will be used for the acquired lock.
1153f23ce757SJan Kara 		 * This is not strictly necessary (no current caller cares) but
1154f23ce757SJan Kara 		 * let's keep things consistent.
1155f23ce757SJan Kara 		 */
1156f23ce757SJan Kara 		if (!inode1)
1157f23ce757SJan Kara 			swap(inode1, inode2);
1158f23ce757SJan Kara 		goto lock;
1159f23ce757SJan Kara 	}
1160f23ce757SJan Kara 
1161f23ce757SJan Kara 	/*
1162f23ce757SJan Kara 	 * If one object is directory and the other is not, we must make sure
1163f23ce757SJan Kara 	 * to lock directory first as the other object may be its child.
1164f23ce757SJan Kara 	 */
1165f23ce757SJan Kara 	if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
1166f23ce757SJan Kara 		if (inode1 > inode2)
1167f23ce757SJan Kara 			swap(inode1, inode2);
1168f23ce757SJan Kara 	} else if (!S_ISDIR(inode1->i_mode))
1169f23ce757SJan Kara 		swap(inode1, inode2);
1170f23ce757SJan Kara lock:
1171f23ce757SJan Kara 	if (inode1)
1172f23ce757SJan Kara 		inode_lock_nested(inode1, subclass1);
1173f23ce757SJan Kara 	if (inode2 && inode2 != inode1)
1174f23ce757SJan Kara 		inode_lock_nested(inode2, subclass2);
1175f23ce757SJan Kara }
1176f23ce757SJan Kara 
1177f23ce757SJan Kara /**
1178375e289eSJ. Bruce Fields  * lock_two_nondirectories - take two i_mutexes on non-directory objects
11794fd699aeSJ. Bruce Fields  *
11802454ad83SJan Kara  * Lock any non-NULL argument. Passed objects must not be directories.
11814fd699aeSJ. Bruce Fields  * Zero, one or two objects may be locked by this function.
11824fd699aeSJ. Bruce Fields  *
1183375e289eSJ. Bruce Fields  * @inode1: first inode to lock
1184375e289eSJ. Bruce Fields  * @inode2: second inode to lock
1185375e289eSJ. Bruce Fields  */
lock_two_nondirectories(struct inode * inode1,struct inode * inode2)1186375e289eSJ. Bruce Fields void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1187375e289eSJ. Bruce Fields {
118833ab231fSChristian Brauner 	if (inode1)
11892454ad83SJan Kara 		WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
119033ab231fSChristian Brauner 	if (inode2)
11912454ad83SJan Kara 		WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
11922454ad83SJan Kara 	lock_two_inodes(inode1, inode2, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
1193375e289eSJ. Bruce Fields }
1194375e289eSJ. Bruce Fields EXPORT_SYMBOL(lock_two_nondirectories);
1195375e289eSJ. Bruce Fields 
1196375e289eSJ. Bruce Fields /**
1197375e289eSJ. Bruce Fields  * unlock_two_nondirectories - release locks from lock_two_nondirectories()
1198375e289eSJ. Bruce Fields  * @inode1: first inode to unlock
1199375e289eSJ. Bruce Fields  * @inode2: second inode to unlock
1200375e289eSJ. Bruce Fields  */
unlock_two_nondirectories(struct inode * inode1,struct inode * inode2)1201375e289eSJ. Bruce Fields void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1202375e289eSJ. Bruce Fields {
12032454ad83SJan Kara 	if (inode1) {
12042454ad83SJan Kara 		WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
12055955102cSAl Viro 		inode_unlock(inode1);
12062454ad83SJan Kara 	}
12072454ad83SJan Kara 	if (inode2 && inode2 != inode1) {
12082454ad83SJan Kara 		WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
12095955102cSAl Viro 		inode_unlock(inode2);
1210375e289eSJ. Bruce Fields 	}
12112454ad83SJan Kara }
1212375e289eSJ. Bruce Fields EXPORT_SYMBOL(unlock_two_nondirectories);
1213375e289eSJ. Bruce Fields 
1214375e289eSJ. Bruce Fields /**
121580ea09a0SMiklos Szeredi  * inode_insert5 - obtain an inode from a mounted file system
121680ea09a0SMiklos Szeredi  * @inode:	pre-allocated inode to use for insert to cache
121780ea09a0SMiklos Szeredi  * @hashval:	hash value (usually inode number) to get
121880ea09a0SMiklos Szeredi  * @test:	callback used for comparisons between inodes
121980ea09a0SMiklos Szeredi  * @set:	callback used to initialize a new struct inode
122080ea09a0SMiklos Szeredi  * @data:	opaque data pointer to pass to @test and @set
122180ea09a0SMiklos Szeredi  *
122280ea09a0SMiklos Szeredi  * Search for the inode specified by @hashval and @data in the inode cache,
122380ea09a0SMiklos Szeredi  * and if present it is return it with an increased reference count. This is
122480ea09a0SMiklos Szeredi  * a variant of iget5_locked() for callers that don't want to fail on memory
122580ea09a0SMiklos Szeredi  * allocation of inode.
122680ea09a0SMiklos Szeredi  *
122780ea09a0SMiklos Szeredi  * If the inode is not in cache, insert the pre-allocated inode to cache and
122880ea09a0SMiklos Szeredi  * return it locked, hashed, and with the I_NEW flag set. The file system gets
122980ea09a0SMiklos Szeredi  * to fill it in before unlocking it via unlock_new_inode().
123080ea09a0SMiklos Szeredi  *
123180ea09a0SMiklos Szeredi  * Note both @test and @set are called with the inode_hash_lock held, so can't
123280ea09a0SMiklos Szeredi  * sleep.
123380ea09a0SMiklos Szeredi  */
inode_insert5(struct inode * inode,unsigned long hashval,int (* test)(struct inode *,void *),int (* set)(struct inode *,void *),void * data)123480ea09a0SMiklos Szeredi struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
123580ea09a0SMiklos Szeredi 			    int (*test)(struct inode *, void *),
123680ea09a0SMiklos Szeredi 			    int (*set)(struct inode *, void *), void *data)
123780ea09a0SMiklos Szeredi {
123880ea09a0SMiklos Szeredi 	struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
123980ea09a0SMiklos Szeredi 	struct inode *old;
124080ea09a0SMiklos Szeredi 
124180ea09a0SMiklos Szeredi again:
124280ea09a0SMiklos Szeredi 	spin_lock(&inode_hash_lock);
124380ea09a0SMiklos Szeredi 	old = find_inode(inode->i_sb, head, test, data);
124480ea09a0SMiklos Szeredi 	if (unlikely(old)) {
124580ea09a0SMiklos Szeredi 		/*
124680ea09a0SMiklos Szeredi 		 * Uhhuh, somebody else created the same inode under us.
124780ea09a0SMiklos Szeredi 		 * Use the old inode instead of the preallocated one.
124880ea09a0SMiklos Szeredi 		 */
124980ea09a0SMiklos Szeredi 		spin_unlock(&inode_hash_lock);
1250c2b6d621SAl Viro 		if (IS_ERR(old))
1251c2b6d621SAl Viro 			return NULL;
125280ea09a0SMiklos Szeredi 		wait_on_inode(old);
125380ea09a0SMiklos Szeredi 		if (unlikely(inode_unhashed(old))) {
125480ea09a0SMiklos Szeredi 			iput(old);
125580ea09a0SMiklos Szeredi 			goto again;
125680ea09a0SMiklos Szeredi 		}
125780ea09a0SMiklos Szeredi 		return old;
125880ea09a0SMiklos Szeredi 	}
125980ea09a0SMiklos Szeredi 
126080ea09a0SMiklos Szeredi 	if (set && unlikely(set(inode, data))) {
126180ea09a0SMiklos Szeredi 		inode = NULL;
126280ea09a0SMiklos Szeredi 		goto unlock;
126380ea09a0SMiklos Szeredi 	}
126480ea09a0SMiklos Szeredi 
126580ea09a0SMiklos Szeredi 	/*
126680ea09a0SMiklos Szeredi 	 * Return the locked inode with I_NEW set, the
126780ea09a0SMiklos Szeredi 	 * caller is responsible for filling in the contents
126880ea09a0SMiklos Szeredi 	 */
126980ea09a0SMiklos Szeredi 	spin_lock(&inode->i_lock);
127080ea09a0SMiklos Szeredi 	inode->i_state |= I_NEW;
12713f19b2abSDavid Howells 	hlist_add_head_rcu(&inode->i_hash, head);
127280ea09a0SMiklos Szeredi 	spin_unlock(&inode->i_lock);
127318cc912bSJeff Layton 
127418cc912bSJeff Layton 	/*
127518cc912bSJeff Layton 	 * Add inode to the sb list if it's not already. It has I_NEW at this
127618cc912bSJeff Layton 	 * point, so it should be safe to test i_sb_list locklessly.
127718cc912bSJeff Layton 	 */
127818cc912bSJeff Layton 	if (list_empty(&inode->i_sb_list))
1279e950564bSMiklos Szeredi 		inode_sb_list_add(inode);
128080ea09a0SMiklos Szeredi unlock:
128180ea09a0SMiklos Szeredi 	spin_unlock(&inode_hash_lock);
128280ea09a0SMiklos Szeredi 
128380ea09a0SMiklos Szeredi 	return inode;
128480ea09a0SMiklos Szeredi }
128580ea09a0SMiklos Szeredi EXPORT_SYMBOL(inode_insert5);
128680ea09a0SMiklos Szeredi 
128780ea09a0SMiklos Szeredi /**
12880b2d0724SChristoph Hellwig  * iget5_locked - obtain an inode from a mounted file system
12890b2d0724SChristoph Hellwig  * @sb:		super block of file system
12900b2d0724SChristoph Hellwig  * @hashval:	hash value (usually inode number) to get
12910b2d0724SChristoph Hellwig  * @test:	callback used for comparisons between inodes
12920b2d0724SChristoph Hellwig  * @set:	callback used to initialize a new struct inode
12930b2d0724SChristoph Hellwig  * @data:	opaque data pointer to pass to @test and @set
12941da177e4SLinus Torvalds  *
12950b2d0724SChristoph Hellwig  * Search for the inode specified by @hashval and @data in the inode cache,
12960b2d0724SChristoph Hellwig  * and if present it is return it with an increased reference count. This is
12970b2d0724SChristoph Hellwig  * a generalized version of iget_locked() for file systems where the inode
12980b2d0724SChristoph Hellwig  * number is not sufficient for unique identification of an inode.
12990b2d0724SChristoph Hellwig  *
13000b2d0724SChristoph Hellwig  * If the inode is not in cache, allocate a new inode and return it locked,
13010b2d0724SChristoph Hellwig  * hashed, and with the I_NEW flag set. The file system gets to fill it in
13020b2d0724SChristoph Hellwig  * before unlocking it via unlock_new_inode().
13030b2d0724SChristoph Hellwig  *
13040b2d0724SChristoph Hellwig  * Note both @test and @set are called with the inode_hash_lock held, so can't
13050b2d0724SChristoph Hellwig  * sleep.
13061da177e4SLinus Torvalds  */
iget5_locked(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),int (* set)(struct inode *,void *),void * data)13070b2d0724SChristoph Hellwig struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
13086b3304b5SManish Katiyar 		int (*test)(struct inode *, void *),
13090b2d0724SChristoph Hellwig 		int (*set)(struct inode *, void *), void *data)
13101da177e4SLinus Torvalds {
131180ea09a0SMiklos Szeredi 	struct inode *inode = ilookup5(sb, hashval, test, data);
13120b2d0724SChristoph Hellwig 
131380ea09a0SMiklos Szeredi 	if (!inode) {
1314e950564bSMiklos Szeredi 		struct inode *new = alloc_inode(sb);
13150b2d0724SChristoph Hellwig 
131680ea09a0SMiklos Szeredi 		if (new) {
1317e950564bSMiklos Szeredi 			new->i_state = 0;
131880ea09a0SMiklos Szeredi 			inode = inode_insert5(new, hashval, test, set, data);
131980ea09a0SMiklos Szeredi 			if (unlikely(inode != new))
1320e950564bSMiklos Szeredi 				destroy_inode(new);
13212864f301SAl Viro 		}
13221da177e4SLinus Torvalds 	}
13231da177e4SLinus Torvalds 	return inode;
13241da177e4SLinus Torvalds }
13250b2d0724SChristoph Hellwig EXPORT_SYMBOL(iget5_locked);
13261da177e4SLinus Torvalds 
13270b2d0724SChristoph Hellwig /**
13280b2d0724SChristoph Hellwig  * iget_locked - obtain an inode from a mounted file system
13290b2d0724SChristoph Hellwig  * @sb:		super block of file system
13300b2d0724SChristoph Hellwig  * @ino:	inode number to get
13310b2d0724SChristoph Hellwig  *
13320b2d0724SChristoph Hellwig  * Search for the inode specified by @ino in the inode cache and if present
13330b2d0724SChristoph Hellwig  * return it with an increased reference count. This is for file systems
13340b2d0724SChristoph Hellwig  * where the inode number is sufficient for unique identification of an inode.
13350b2d0724SChristoph Hellwig  *
13360b2d0724SChristoph Hellwig  * If the inode is not in cache, allocate a new inode and return it locked,
13370b2d0724SChristoph Hellwig  * hashed, and with the I_NEW flag set.  The file system gets to fill it in
13380b2d0724SChristoph Hellwig  * before unlocking it via unlock_new_inode().
13391da177e4SLinus Torvalds  */
iget_locked(struct super_block * sb,unsigned long ino)13400b2d0724SChristoph Hellwig struct inode *iget_locked(struct super_block *sb, unsigned long ino)
13411da177e4SLinus Torvalds {
13420b2d0724SChristoph Hellwig 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
13431da177e4SLinus Torvalds 	struct inode *inode;
13442864f301SAl Viro again:
13450b2d0724SChristoph Hellwig 	spin_lock(&inode_hash_lock);
13460b2d0724SChristoph Hellwig 	inode = find_inode_fast(sb, head, ino);
13470b2d0724SChristoph Hellwig 	spin_unlock(&inode_hash_lock);
13480b2d0724SChristoph Hellwig 	if (inode) {
1349c2b6d621SAl Viro 		if (IS_ERR(inode))
1350c2b6d621SAl Viro 			return NULL;
13510b2d0724SChristoph Hellwig 		wait_on_inode(inode);
13522864f301SAl Viro 		if (unlikely(inode_unhashed(inode))) {
13532864f301SAl Viro 			iput(inode);
13542864f301SAl Viro 			goto again;
13552864f301SAl Viro 		}
13560b2d0724SChristoph Hellwig 		return inode;
13570b2d0724SChristoph Hellwig 	}
13580b2d0724SChristoph Hellwig 
13591da177e4SLinus Torvalds 	inode = alloc_inode(sb);
13601da177e4SLinus Torvalds 	if (inode) {
13611da177e4SLinus Torvalds 		struct inode *old;
13621da177e4SLinus Torvalds 
136367a23c49SDave Chinner 		spin_lock(&inode_hash_lock);
13641da177e4SLinus Torvalds 		/* We released the lock, so.. */
13651da177e4SLinus Torvalds 		old = find_inode_fast(sb, head, ino);
13661da177e4SLinus Torvalds 		if (!old) {
13671da177e4SLinus Torvalds 			inode->i_ino = ino;
1368250df6edSDave Chinner 			spin_lock(&inode->i_lock);
1369eaff8079SChristoph Hellwig 			inode->i_state = I_NEW;
13703f19b2abSDavid Howells 			hlist_add_head_rcu(&inode->i_hash, head);
1371250df6edSDave Chinner 			spin_unlock(&inode->i_lock);
137255fa6091SDave Chinner 			inode_sb_list_add(inode);
137367a23c49SDave Chinner 			spin_unlock(&inode_hash_lock);
13741da177e4SLinus Torvalds 
13751da177e4SLinus Torvalds 			/* Return the locked inode with I_NEW set, the
13761da177e4SLinus Torvalds 			 * caller is responsible for filling in the contents
13771da177e4SLinus Torvalds 			 */
13781da177e4SLinus Torvalds 			return inode;
13791da177e4SLinus Torvalds 		}
13801da177e4SLinus Torvalds 
13811da177e4SLinus Torvalds 		/*
13821da177e4SLinus Torvalds 		 * Uhhuh, somebody else created the same inode under
13831da177e4SLinus Torvalds 		 * us. Use the old inode instead of the one we just
13841da177e4SLinus Torvalds 		 * allocated.
13851da177e4SLinus Torvalds 		 */
138667a23c49SDave Chinner 		spin_unlock(&inode_hash_lock);
13871da177e4SLinus Torvalds 		destroy_inode(inode);
1388c2b6d621SAl Viro 		if (IS_ERR(old))
1389c2b6d621SAl Viro 			return NULL;
13901da177e4SLinus Torvalds 		inode = old;
13911da177e4SLinus Torvalds 		wait_on_inode(inode);
13922864f301SAl Viro 		if (unlikely(inode_unhashed(inode))) {
13932864f301SAl Viro 			iput(inode);
13942864f301SAl Viro 			goto again;
13952864f301SAl Viro 		}
13961da177e4SLinus Torvalds 	}
13971da177e4SLinus Torvalds 	return inode;
13981da177e4SLinus Torvalds }
13990b2d0724SChristoph Hellwig EXPORT_SYMBOL(iget_locked);
14001da177e4SLinus Torvalds 
1401ad5e195aSChristoph Hellwig /*
1402ad5e195aSChristoph Hellwig  * search the inode cache for a matching inode number.
1403ad5e195aSChristoph Hellwig  * If we find one, then the inode number we are trying to
1404ad5e195aSChristoph Hellwig  * allocate is not unique and so we should not use it.
1405ad5e195aSChristoph Hellwig  *
1406ad5e195aSChristoph Hellwig  * Returns 1 if the inode number is unique, 0 if it is not.
1407ad5e195aSChristoph Hellwig  */
test_inode_iunique(struct super_block * sb,unsigned long ino)1408ad5e195aSChristoph Hellwig static int test_inode_iunique(struct super_block *sb, unsigned long ino)
1409ad5e195aSChristoph Hellwig {
1410ad5e195aSChristoph Hellwig 	struct hlist_head *b = inode_hashtable + hash(sb, ino);
1411ad5e195aSChristoph Hellwig 	struct inode *inode;
1412ad5e195aSChristoph Hellwig 
14133f19b2abSDavid Howells 	hlist_for_each_entry_rcu(inode, b, i_hash) {
14143f19b2abSDavid Howells 		if (inode->i_ino == ino && inode->i_sb == sb)
1415ad5e195aSChristoph Hellwig 			return 0;
1416ad5e195aSChristoph Hellwig 	}
1417ad5e195aSChristoph Hellwig 	return 1;
1418ad5e195aSChristoph Hellwig }
1419ad5e195aSChristoph Hellwig 
14201da177e4SLinus Torvalds /**
14211da177e4SLinus Torvalds  *	iunique - get a unique inode number
14221da177e4SLinus Torvalds  *	@sb: superblock
14231da177e4SLinus Torvalds  *	@max_reserved: highest reserved inode number
14241da177e4SLinus Torvalds  *
14251da177e4SLinus Torvalds  *	Obtain an inode number that is unique on the system for a given
14261da177e4SLinus Torvalds  *	superblock. This is used by file systems that have no natural
14271da177e4SLinus Torvalds  *	permanent inode numbering system. An inode number is returned that
14281da177e4SLinus Torvalds  *	is higher than the reserved limit but unique.
14291da177e4SLinus Torvalds  *
14301da177e4SLinus Torvalds  *	BUGS:
14311da177e4SLinus Torvalds  *	With a large number of inodes live on the file system this function
14321da177e4SLinus Torvalds  *	currently becomes quite slow.
14331da177e4SLinus Torvalds  */
iunique(struct super_block * sb,ino_t max_reserved)14341da177e4SLinus Torvalds ino_t iunique(struct super_block *sb, ino_t max_reserved)
14351da177e4SLinus Torvalds {
1436866b04fcSJeff Layton 	/*
1437866b04fcSJeff Layton 	 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
1438866b04fcSJeff Layton 	 * error if st_ino won't fit in target struct field. Use 32bit counter
1439866b04fcSJeff Layton 	 * here to attempt to avoid that.
1440866b04fcSJeff Layton 	 */
1441ad5e195aSChristoph Hellwig 	static DEFINE_SPINLOCK(iunique_lock);
1442866b04fcSJeff Layton 	static unsigned int counter;
14431da177e4SLinus Torvalds 	ino_t res;
14443361c7beSJeffrey Layton 
14453f19b2abSDavid Howells 	rcu_read_lock();
1446ad5e195aSChristoph Hellwig 	spin_lock(&iunique_lock);
14473361c7beSJeffrey Layton 	do {
14483361c7beSJeffrey Layton 		if (counter <= max_reserved)
14493361c7beSJeffrey Layton 			counter = max_reserved + 1;
14501da177e4SLinus Torvalds 		res = counter++;
1451ad5e195aSChristoph Hellwig 	} while (!test_inode_iunique(sb, res));
1452ad5e195aSChristoph Hellwig 	spin_unlock(&iunique_lock);
14533f19b2abSDavid Howells 	rcu_read_unlock();
14543361c7beSJeffrey Layton 
14551da177e4SLinus Torvalds 	return res;
14561da177e4SLinus Torvalds }
14571da177e4SLinus Torvalds EXPORT_SYMBOL(iunique);
14581da177e4SLinus Torvalds 
igrab(struct inode * inode)14591da177e4SLinus Torvalds struct inode *igrab(struct inode *inode)
14601da177e4SLinus Torvalds {
1461250df6edSDave Chinner 	spin_lock(&inode->i_lock);
1462250df6edSDave Chinner 	if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
14631da177e4SLinus Torvalds 		__iget(inode);
1464250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
1465250df6edSDave Chinner 	} else {
1466250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
14671da177e4SLinus Torvalds 		/*
14681da177e4SLinus Torvalds 		 * Handle the case where s_op->clear_inode is not been
14691da177e4SLinus Torvalds 		 * called yet, and somebody is calling igrab
14701da177e4SLinus Torvalds 		 * while the inode is getting freed.
14711da177e4SLinus Torvalds 		 */
14721da177e4SLinus Torvalds 		inode = NULL;
1473250df6edSDave Chinner 	}
14741da177e4SLinus Torvalds 	return inode;
14751da177e4SLinus Torvalds }
14761da177e4SLinus Torvalds EXPORT_SYMBOL(igrab);
14771da177e4SLinus Torvalds 
14781da177e4SLinus Torvalds /**
147988bd5121SAnton Altaparmakov  * ilookup5_nowait - search for an inode in the inode cache
14801da177e4SLinus Torvalds  * @sb:		super block of file system to search
14811da177e4SLinus Torvalds  * @hashval:	hash value (usually inode number) to search for
14821da177e4SLinus Torvalds  * @test:	callback used for comparisons between inodes
14831da177e4SLinus Torvalds  * @data:	opaque data pointer to pass to @test
14841da177e4SLinus Torvalds  *
14850b2d0724SChristoph Hellwig  * Search for the inode specified by @hashval and @data in the inode cache.
14861da177e4SLinus Torvalds  * If the inode is in the cache, the inode is returned with an incremented
14870b2d0724SChristoph Hellwig  * reference count.
148888bd5121SAnton Altaparmakov  *
14890b2d0724SChristoph Hellwig  * Note: I_NEW is not waited upon so you have to be very careful what you do
14900b2d0724SChristoph Hellwig  * with the returned inode.  You probably should be using ilookup5() instead.
149188bd5121SAnton Altaparmakov  *
1492b6d0ad68SRandy Dunlap  * Note2: @test is called with the inode_hash_lock held, so can't sleep.
149388bd5121SAnton Altaparmakov  */
ilookup5_nowait(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)149488bd5121SAnton Altaparmakov struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
149588bd5121SAnton Altaparmakov 		int (*test)(struct inode *, void *), void *data)
149688bd5121SAnton Altaparmakov {
149788bd5121SAnton Altaparmakov 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
14980b2d0724SChristoph Hellwig 	struct inode *inode;
149988bd5121SAnton Altaparmakov 
15000b2d0724SChristoph Hellwig 	spin_lock(&inode_hash_lock);
15010b2d0724SChristoph Hellwig 	inode = find_inode(sb, head, test, data);
15020b2d0724SChristoph Hellwig 	spin_unlock(&inode_hash_lock);
15030b2d0724SChristoph Hellwig 
1504c2b6d621SAl Viro 	return IS_ERR(inode) ? NULL : inode;
150588bd5121SAnton Altaparmakov }
150688bd5121SAnton Altaparmakov EXPORT_SYMBOL(ilookup5_nowait);
150788bd5121SAnton Altaparmakov 
150888bd5121SAnton Altaparmakov /**
150988bd5121SAnton Altaparmakov  * ilookup5 - search for an inode in the inode cache
151088bd5121SAnton Altaparmakov  * @sb:		super block of file system to search
151188bd5121SAnton Altaparmakov  * @hashval:	hash value (usually inode number) to search for
151288bd5121SAnton Altaparmakov  * @test:	callback used for comparisons between inodes
151388bd5121SAnton Altaparmakov  * @data:	opaque data pointer to pass to @test
151488bd5121SAnton Altaparmakov  *
15150b2d0724SChristoph Hellwig  * Search for the inode specified by @hashval and @data in the inode cache,
15160b2d0724SChristoph Hellwig  * and if the inode is in the cache, return the inode with an incremented
15170b2d0724SChristoph Hellwig  * reference count.  Waits on I_NEW before returning the inode.
151888bd5121SAnton Altaparmakov  * returned with an incremented reference count.
15191da177e4SLinus Torvalds  *
15200b2d0724SChristoph Hellwig  * This is a generalized version of ilookup() for file systems where the
15210b2d0724SChristoph Hellwig  * inode number is not sufficient for unique identification of an inode.
15221da177e4SLinus Torvalds  *
15230b2d0724SChristoph Hellwig  * Note: @test is called with the inode_hash_lock held, so can't sleep.
15241da177e4SLinus Torvalds  */
ilookup5(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)15251da177e4SLinus Torvalds struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
15261da177e4SLinus Torvalds 		int (*test)(struct inode *, void *), void *data)
15271da177e4SLinus Torvalds {
15282864f301SAl Viro 	struct inode *inode;
15292864f301SAl Viro again:
15302864f301SAl Viro 	inode = ilookup5_nowait(sb, hashval, test, data);
15312864f301SAl Viro 	if (inode) {
15320b2d0724SChristoph Hellwig 		wait_on_inode(inode);
15332864f301SAl Viro 		if (unlikely(inode_unhashed(inode))) {
15342864f301SAl Viro 			iput(inode);
15352864f301SAl Viro 			goto again;
15362864f301SAl Viro 		}
15372864f301SAl Viro 	}
15380b2d0724SChristoph Hellwig 	return inode;
15391da177e4SLinus Torvalds }
15401da177e4SLinus Torvalds EXPORT_SYMBOL(ilookup5);
15411da177e4SLinus Torvalds 
15421da177e4SLinus Torvalds /**
15431da177e4SLinus Torvalds  * ilookup - search for an inode in the inode cache
15441da177e4SLinus Torvalds  * @sb:		super block of file system to search
15451da177e4SLinus Torvalds  * @ino:	inode number to search for
15461da177e4SLinus Torvalds  *
15470b2d0724SChristoph Hellwig  * Search for the inode @ino in the inode cache, and if the inode is in the
15480b2d0724SChristoph Hellwig  * cache, the inode is returned with an incremented reference count.
15491da177e4SLinus Torvalds  */
ilookup(struct super_block * sb,unsigned long ino)15501da177e4SLinus Torvalds struct inode *ilookup(struct super_block *sb, unsigned long ino)
15511da177e4SLinus Torvalds {
15521da177e4SLinus Torvalds 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
15530b2d0724SChristoph Hellwig 	struct inode *inode;
15542864f301SAl Viro again:
15550b2d0724SChristoph Hellwig 	spin_lock(&inode_hash_lock);
15560b2d0724SChristoph Hellwig 	inode = find_inode_fast(sb, head, ino);
15570b2d0724SChristoph Hellwig 	spin_unlock(&inode_hash_lock);
15580b2d0724SChristoph Hellwig 
15592864f301SAl Viro 	if (inode) {
1560c2b6d621SAl Viro 		if (IS_ERR(inode))
1561c2b6d621SAl Viro 			return NULL;
15620b2d0724SChristoph Hellwig 		wait_on_inode(inode);
15632864f301SAl Viro 		if (unlikely(inode_unhashed(inode))) {
15642864f301SAl Viro 			iput(inode);
15652864f301SAl Viro 			goto again;
15662864f301SAl Viro 		}
15672864f301SAl Viro 	}
15680b2d0724SChristoph Hellwig 	return inode;
15691da177e4SLinus Torvalds }
15701da177e4SLinus Torvalds EXPORT_SYMBOL(ilookup);
15711da177e4SLinus Torvalds 
1572fe032c42STheodore Ts'o /**
1573fe032c42STheodore Ts'o  * find_inode_nowait - find an inode in the inode cache
1574fe032c42STheodore Ts'o  * @sb:		super block of file system to search
1575fe032c42STheodore Ts'o  * @hashval:	hash value (usually inode number) to search for
1576fe032c42STheodore Ts'o  * @match:	callback used for comparisons between inodes
1577fe032c42STheodore Ts'o  * @data:	opaque data pointer to pass to @match
1578fe032c42STheodore Ts'o  *
1579fe032c42STheodore Ts'o  * Search for the inode specified by @hashval and @data in the inode
1580fe032c42STheodore Ts'o  * cache, where the helper function @match will return 0 if the inode
1581fe032c42STheodore Ts'o  * does not match, 1 if the inode does match, and -1 if the search
1582fe032c42STheodore Ts'o  * should be stopped.  The @match function must be responsible for
1583fe032c42STheodore Ts'o  * taking the i_lock spin_lock and checking i_state for an inode being
1584fe032c42STheodore Ts'o  * freed or being initialized, and incrementing the reference count
1585fe032c42STheodore Ts'o  * before returning 1.  It also must not sleep, since it is called with
1586fe032c42STheodore Ts'o  * the inode_hash_lock spinlock held.
1587fe032c42STheodore Ts'o  *
1588fe032c42STheodore Ts'o  * This is a even more generalized version of ilookup5() when the
1589fe032c42STheodore Ts'o  * function must never block --- find_inode() can block in
1590fe032c42STheodore Ts'o  * __wait_on_freeing_inode() --- or when the caller can not increment
1591fe032c42STheodore Ts'o  * the reference count because the resulting iput() might cause an
1592fe032c42STheodore Ts'o  * inode eviction.  The tradeoff is that the @match funtion must be
1593fe032c42STheodore Ts'o  * very carefully implemented.
1594fe032c42STheodore Ts'o  */
find_inode_nowait(struct super_block * sb,unsigned long hashval,int (* match)(struct inode *,unsigned long,void *),void * data)1595fe032c42STheodore Ts'o struct inode *find_inode_nowait(struct super_block *sb,
1596fe032c42STheodore Ts'o 				unsigned long hashval,
1597fe032c42STheodore Ts'o 				int (*match)(struct inode *, unsigned long,
1598fe032c42STheodore Ts'o 					     void *),
1599fe032c42STheodore Ts'o 				void *data)
1600fe032c42STheodore Ts'o {
1601fe032c42STheodore Ts'o 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1602fe032c42STheodore Ts'o 	struct inode *inode, *ret_inode = NULL;
1603fe032c42STheodore Ts'o 	int mval;
1604fe032c42STheodore Ts'o 
1605fe032c42STheodore Ts'o 	spin_lock(&inode_hash_lock);
1606fe032c42STheodore Ts'o 	hlist_for_each_entry(inode, head, i_hash) {
1607fe032c42STheodore Ts'o 		if (inode->i_sb != sb)
1608fe032c42STheodore Ts'o 			continue;
1609fe032c42STheodore Ts'o 		mval = match(inode, hashval, data);
1610fe032c42STheodore Ts'o 		if (mval == 0)
1611fe032c42STheodore Ts'o 			continue;
1612fe032c42STheodore Ts'o 		if (mval == 1)
1613fe032c42STheodore Ts'o 			ret_inode = inode;
1614fe032c42STheodore Ts'o 		goto out;
1615fe032c42STheodore Ts'o 	}
1616fe032c42STheodore Ts'o out:
1617fe032c42STheodore Ts'o 	spin_unlock(&inode_hash_lock);
1618fe032c42STheodore Ts'o 	return ret_inode;
1619fe032c42STheodore Ts'o }
1620fe032c42STheodore Ts'o EXPORT_SYMBOL(find_inode_nowait);
1621fe032c42STheodore Ts'o 
16223f19b2abSDavid Howells /**
16233f19b2abSDavid Howells  * find_inode_rcu - find an inode in the inode cache
16243f19b2abSDavid Howells  * @sb:		Super block of file system to search
16253f19b2abSDavid Howells  * @hashval:	Key to hash
16263f19b2abSDavid Howells  * @test:	Function to test match on an inode
16273f19b2abSDavid Howells  * @data:	Data for test function
16283f19b2abSDavid Howells  *
16293f19b2abSDavid Howells  * Search for the inode specified by @hashval and @data in the inode cache,
16303f19b2abSDavid Howells  * where the helper function @test will return 0 if the inode does not match
16313f19b2abSDavid Howells  * and 1 if it does.  The @test function must be responsible for taking the
16323f19b2abSDavid Howells  * i_lock spin_lock and checking i_state for an inode being freed or being
16333f19b2abSDavid Howells  * initialized.
16343f19b2abSDavid Howells  *
16353f19b2abSDavid Howells  * If successful, this will return the inode for which the @test function
16363f19b2abSDavid Howells  * returned 1 and NULL otherwise.
16373f19b2abSDavid Howells  *
16383f19b2abSDavid Howells  * The @test function is not permitted to take a ref on any inode presented.
16393f19b2abSDavid Howells  * It is also not permitted to sleep.
16403f19b2abSDavid Howells  *
16413f19b2abSDavid Howells  * The caller must hold the RCU read lock.
16423f19b2abSDavid Howells  */
find_inode_rcu(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)16433f19b2abSDavid Howells struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
16443f19b2abSDavid Howells 			     int (*test)(struct inode *, void *), void *data)
16453f19b2abSDavid Howells {
16463f19b2abSDavid Howells 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
16473f19b2abSDavid Howells 	struct inode *inode;
16483f19b2abSDavid Howells 
16493f19b2abSDavid Howells 	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
16503f19b2abSDavid Howells 			 "suspicious find_inode_rcu() usage");
16513f19b2abSDavid Howells 
16523f19b2abSDavid Howells 	hlist_for_each_entry_rcu(inode, head, i_hash) {
16533f19b2abSDavid Howells 		if (inode->i_sb == sb &&
16543f19b2abSDavid Howells 		    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
16553f19b2abSDavid Howells 		    test(inode, data))
16563f19b2abSDavid Howells 			return inode;
16573f19b2abSDavid Howells 	}
16583f19b2abSDavid Howells 	return NULL;
16593f19b2abSDavid Howells }
16603f19b2abSDavid Howells EXPORT_SYMBOL(find_inode_rcu);
16613f19b2abSDavid Howells 
16623f19b2abSDavid Howells /**
1663961f3c89SMauro Carvalho Chehab  * find_inode_by_ino_rcu - Find an inode in the inode cache
16643f19b2abSDavid Howells  * @sb:		Super block of file system to search
16653f19b2abSDavid Howells  * @ino:	The inode number to match
16663f19b2abSDavid Howells  *
16673f19b2abSDavid Howells  * Search for the inode specified by @hashval and @data in the inode cache,
16683f19b2abSDavid Howells  * where the helper function @test will return 0 if the inode does not match
16693f19b2abSDavid Howells  * and 1 if it does.  The @test function must be responsible for taking the
16703f19b2abSDavid Howells  * i_lock spin_lock and checking i_state for an inode being freed or being
16713f19b2abSDavid Howells  * initialized.
16723f19b2abSDavid Howells  *
16733f19b2abSDavid Howells  * If successful, this will return the inode for which the @test function
16743f19b2abSDavid Howells  * returned 1 and NULL otherwise.
16753f19b2abSDavid Howells  *
16763f19b2abSDavid Howells  * The @test function is not permitted to take a ref on any inode presented.
16773f19b2abSDavid Howells  * It is also not permitted to sleep.
16783f19b2abSDavid Howells  *
16793f19b2abSDavid Howells  * The caller must hold the RCU read lock.
16803f19b2abSDavid Howells  */
find_inode_by_ino_rcu(struct super_block * sb,unsigned long ino)16813f19b2abSDavid Howells struct inode *find_inode_by_ino_rcu(struct super_block *sb,
16823f19b2abSDavid Howells 				    unsigned long ino)
16833f19b2abSDavid Howells {
16843f19b2abSDavid Howells 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
16853f19b2abSDavid Howells 	struct inode *inode;
16863f19b2abSDavid Howells 
16873f19b2abSDavid Howells 	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
16883f19b2abSDavid Howells 			 "suspicious find_inode_by_ino_rcu() usage");
16893f19b2abSDavid Howells 
16903f19b2abSDavid Howells 	hlist_for_each_entry_rcu(inode, head, i_hash) {
16913f19b2abSDavid Howells 		if (inode->i_ino == ino &&
16923f19b2abSDavid Howells 		    inode->i_sb == sb &&
16933f19b2abSDavid Howells 		    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
16943f19b2abSDavid Howells 		    return inode;
16953f19b2abSDavid Howells 	}
16963f19b2abSDavid Howells 	return NULL;
16973f19b2abSDavid Howells }
16983f19b2abSDavid Howells EXPORT_SYMBOL(find_inode_by_ino_rcu);
16993f19b2abSDavid Howells 
insert_inode_locked(struct inode * inode)1700261bca86SAl Viro int insert_inode_locked(struct inode *inode)
1701261bca86SAl Viro {
1702261bca86SAl Viro 	struct super_block *sb = inode->i_sb;
1703261bca86SAl Viro 	ino_t ino = inode->i_ino;
1704261bca86SAl Viro 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
1705261bca86SAl Viro 
1706261bca86SAl Viro 	while (1) {
170772a43d63SAl Viro 		struct inode *old = NULL;
170867a23c49SDave Chinner 		spin_lock(&inode_hash_lock);
1709b67bfe0dSSasha Levin 		hlist_for_each_entry(old, head, i_hash) {
171072a43d63SAl Viro 			if (old->i_ino != ino)
171172a43d63SAl Viro 				continue;
171272a43d63SAl Viro 			if (old->i_sb != sb)
171372a43d63SAl Viro 				continue;
1714250df6edSDave Chinner 			spin_lock(&old->i_lock);
1715250df6edSDave Chinner 			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1716250df6edSDave Chinner 				spin_unlock(&old->i_lock);
171772a43d63SAl Viro 				continue;
1718250df6edSDave Chinner 			}
171972a43d63SAl Viro 			break;
172072a43d63SAl Viro 		}
1721b67bfe0dSSasha Levin 		if (likely(!old)) {
1722250df6edSDave Chinner 			spin_lock(&inode->i_lock);
1723c2b6d621SAl Viro 			inode->i_state |= I_NEW | I_CREATING;
17243f19b2abSDavid Howells 			hlist_add_head_rcu(&inode->i_hash, head);
1725250df6edSDave Chinner 			spin_unlock(&inode->i_lock);
172667a23c49SDave Chinner 			spin_unlock(&inode_hash_lock);
1727261bca86SAl Viro 			return 0;
1728261bca86SAl Viro 		}
1729c2b6d621SAl Viro 		if (unlikely(old->i_state & I_CREATING)) {
1730c2b6d621SAl Viro 			spin_unlock(&old->i_lock);
1731c2b6d621SAl Viro 			spin_unlock(&inode_hash_lock);
1732c2b6d621SAl Viro 			return -EBUSY;
1733c2b6d621SAl Viro 		}
1734261bca86SAl Viro 		__iget(old);
1735250df6edSDave Chinner 		spin_unlock(&old->i_lock);
173667a23c49SDave Chinner 		spin_unlock(&inode_hash_lock);
1737261bca86SAl Viro 		wait_on_inode(old);
17381d3382cbSAl Viro 		if (unlikely(!inode_unhashed(old))) {
1739261bca86SAl Viro 			iput(old);
1740261bca86SAl Viro 			return -EBUSY;
1741261bca86SAl Viro 		}
1742261bca86SAl Viro 		iput(old);
1743261bca86SAl Viro 	}
1744261bca86SAl Viro }
1745261bca86SAl Viro EXPORT_SYMBOL(insert_inode_locked);
1746261bca86SAl Viro 
insert_inode_locked4(struct inode * inode,unsigned long hashval,int (* test)(struct inode *,void *),void * data)1747261bca86SAl Viro int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1748261bca86SAl Viro 		int (*test)(struct inode *, void *), void *data)
1749261bca86SAl Viro {
1750c2b6d621SAl Viro 	struct inode *old;
1751c2b6d621SAl Viro 
1752c2b6d621SAl Viro 	inode->i_state |= I_CREATING;
1753c2b6d621SAl Viro 	old = inode_insert5(inode, hashval, test, NULL, data);
1754261bca86SAl Viro 
175580ea09a0SMiklos Szeredi 	if (old != inode) {
1756261bca86SAl Viro 		iput(old);
1757261bca86SAl Viro 		return -EBUSY;
1758261bca86SAl Viro 	}
175980ea09a0SMiklos Szeredi 	return 0;
1760261bca86SAl Viro }
1761261bca86SAl Viro EXPORT_SYMBOL(insert_inode_locked4);
1762261bca86SAl Viro 
17631da177e4SLinus Torvalds 
generic_delete_inode(struct inode * inode)176445321ac5SAl Viro int generic_delete_inode(struct inode *inode)
17651da177e4SLinus Torvalds {
176645321ac5SAl Viro 	return 1;
17671da177e4SLinus Torvalds }
17681da177e4SLinus Torvalds EXPORT_SYMBOL(generic_delete_inode);
17691da177e4SLinus Torvalds 
177045321ac5SAl Viro /*
177145321ac5SAl Viro  * Called when we're dropping the last reference
177245321ac5SAl Viro  * to an inode.
177345321ac5SAl Viro  *
177445321ac5SAl Viro  * Call the FS "drop_inode()" function, defaulting to
177545321ac5SAl Viro  * the legacy UNIX filesystem behaviour.  If it tells
177645321ac5SAl Viro  * us to evict inode, do so.  Otherwise, retain inode
177745321ac5SAl Viro  * in cache if fs is alive, sync and evict if fs is
177845321ac5SAl Viro  * shutting down.
177945321ac5SAl Viro  */
iput_final(struct inode * inode)178045321ac5SAl Viro static void iput_final(struct inode *inode)
17811da177e4SLinus Torvalds {
17821da177e4SLinus Torvalds 	struct super_block *sb = inode->i_sb;
178345321ac5SAl Viro 	const struct super_operations *op = inode->i_sb->s_op;
17843f19b2abSDavid Howells 	unsigned long state;
178545321ac5SAl Viro 	int drop;
17861da177e4SLinus Torvalds 
1787250df6edSDave Chinner 	WARN_ON(inode->i_state & I_NEW);
1788250df6edSDave Chinner 
1789e7f59097SAl Viro 	if (op->drop_inode)
179045321ac5SAl Viro 		drop = op->drop_inode(inode);
179145321ac5SAl Viro 	else
179245321ac5SAl Viro 		drop = generic_drop_inode(inode);
179345321ac5SAl Viro 
179488149082SHao Li 	if (!drop &&
179588149082SHao Li 	    !(inode->i_state & I_DONTCACHE) &&
179688149082SHao Li 	    (sb->s_flags & SB_ACTIVE)) {
179751b8c1feSJohannes Weiner 		__inode_add_lru(inode, true);
1798250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
179945321ac5SAl Viro 		return;
1800991114c6SAlexander Viro 	}
1801b2b2af8eSDave Chinner 
18023f19b2abSDavid Howells 	state = inode->i_state;
1803b2b2af8eSDave Chinner 	if (!drop) {
18043f19b2abSDavid Howells 		WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
1805250df6edSDave Chinner 		spin_unlock(&inode->i_lock);
18063f19b2abSDavid Howells 
18071da177e4SLinus Torvalds 		write_inode_now(inode, 1);
18083f19b2abSDavid Howells 
1809250df6edSDave Chinner 		spin_lock(&inode->i_lock);
18103f19b2abSDavid Howells 		state = inode->i_state;
18113f19b2abSDavid Howells 		WARN_ON(state & I_NEW);
18123f19b2abSDavid Howells 		state &= ~I_WILL_FREE;
18131da177e4SLinus Torvalds 	}
18147ccf19a8SNick Piggin 
18153f19b2abSDavid Howells 	WRITE_ONCE(inode->i_state, state | I_FREEING);
1816c4ae0c65SEric Dumazet 	if (!list_empty(&inode->i_lru))
18179e38d86fSNick Piggin 		inode_lru_list_del(inode);
1818250df6edSDave Chinner 	spin_unlock(&inode->i_lock);
1819b2b2af8eSDave Chinner 
1820b2b2af8eSDave Chinner 	evict(inode);
18211da177e4SLinus Torvalds }
18221da177e4SLinus Torvalds 
18231da177e4SLinus Torvalds /**
18241da177e4SLinus Torvalds  *	iput	- put an inode
18251da177e4SLinus Torvalds  *	@inode: inode to put
18261da177e4SLinus Torvalds  *
18271da177e4SLinus Torvalds  *	Puts an inode, dropping its usage count. If the inode use count hits
18281da177e4SLinus Torvalds  *	zero, the inode is then freed and may also be destroyed.
18291da177e4SLinus Torvalds  *
18301da177e4SLinus Torvalds  *	Consequently, iput() can sleep.
18311da177e4SLinus Torvalds  */
iput(struct inode * inode)18321da177e4SLinus Torvalds void iput(struct inode *inode)
18331da177e4SLinus Torvalds {
18340ae45f63STheodore Ts'o 	if (!inode)
18350ae45f63STheodore Ts'o 		return;
1836a4ffdde6SAl Viro 	BUG_ON(inode->i_state & I_CLEAR);
18370ae45f63STheodore Ts'o retry:
18380ae45f63STheodore Ts'o 	if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
18390ae45f63STheodore Ts'o 		if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
18400ae45f63STheodore Ts'o 			atomic_inc(&inode->i_count);
18410ae45f63STheodore Ts'o 			spin_unlock(&inode->i_lock);
18420ae45f63STheodore Ts'o 			trace_writeback_lazytime_iput(inode);
18430ae45f63STheodore Ts'o 			mark_inode_dirty_sync(inode);
18440ae45f63STheodore Ts'o 			goto retry;
18450ae45f63STheodore Ts'o 		}
18461da177e4SLinus Torvalds 		iput_final(inode);
18471da177e4SLinus Torvalds 	}
18481da177e4SLinus Torvalds }
18491da177e4SLinus Torvalds EXPORT_SYMBOL(iput);
18501da177e4SLinus Torvalds 
185130460e1eSCarlos Maiolino #ifdef CONFIG_BLOCK
18521da177e4SLinus Torvalds /**
18531da177e4SLinus Torvalds  *	bmap	- find a block number in a file
185430460e1eSCarlos Maiolino  *	@inode:  inode owning the block number being requested
185530460e1eSCarlos Maiolino  *	@block: pointer containing the block to find
18561da177e4SLinus Torvalds  *
18572b8e8b55SMauro Carvalho Chehab  *	Replaces the value in ``*block`` with the block number on the device holding
185830460e1eSCarlos Maiolino  *	corresponding to the requested block number in the file.
185930460e1eSCarlos Maiolino  *	That is, asked for block 4 of inode 1 the function will replace the
18602b8e8b55SMauro Carvalho Chehab  *	4 in ``*block``, with disk block relative to the disk start that holds that
186130460e1eSCarlos Maiolino  *	block of the file.
186230460e1eSCarlos Maiolino  *
186330460e1eSCarlos Maiolino  *	Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
18642b8e8b55SMauro Carvalho Chehab  *	hole, returns 0 and ``*block`` is also set to 0.
18651da177e4SLinus Torvalds  */
bmap(struct inode * inode,sector_t * block)186630460e1eSCarlos Maiolino int bmap(struct inode *inode, sector_t *block)
18671da177e4SLinus Torvalds {
186830460e1eSCarlos Maiolino 	if (!inode->i_mapping->a_ops->bmap)
186930460e1eSCarlos Maiolino 		return -EINVAL;
187030460e1eSCarlos Maiolino 
187130460e1eSCarlos Maiolino 	*block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
187230460e1eSCarlos Maiolino 	return 0;
18731da177e4SLinus Torvalds }
18741da177e4SLinus Torvalds EXPORT_SYMBOL(bmap);
187530460e1eSCarlos Maiolino #endif
18761da177e4SLinus Torvalds 
187711ff6f05SMatthew Garrett /*
187811ff6f05SMatthew Garrett  * With relative atime, only update atime if the previous atime is
1879d98ffa1aSStephen Kitt  * earlier than or equal to either the ctime or mtime,
1880d98ffa1aSStephen Kitt  * or if at least a day has passed since the last atime update.
188111ff6f05SMatthew Garrett  */
relatime_need_update(struct vfsmount * mnt,struct inode * inode,struct timespec64 now)1882c6718543SMiklos Szeredi static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
18836f22b664SArnd Bergmann 			     struct timespec64 now)
188411ff6f05SMatthew Garrett {
18852276e5baSJeff Layton 	struct timespec64 ctime;
188611ff6f05SMatthew Garrett 
1887c6718543SMiklos Szeredi 	if (!(mnt->mnt_flags & MNT_RELATIME))
188811ff6f05SMatthew Garrett 		return 1;
188911ff6f05SMatthew Garrett 	/*
1890d98ffa1aSStephen Kitt 	 * Is mtime younger than or equal to atime? If yes, update atime:
189111ff6f05SMatthew Garrett 	 */
189295582b00SDeepa Dinamani 	if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0)
189311ff6f05SMatthew Garrett 		return 1;
189411ff6f05SMatthew Garrett 	/*
1895d98ffa1aSStephen Kitt 	 * Is ctime younger than or equal to atime? If yes, update atime:
189611ff6f05SMatthew Garrett 	 */
18972276e5baSJeff Layton 	ctime = inode_get_ctime(inode);
18982276e5baSJeff Layton 	if (timespec64_compare(&ctime, &inode->i_atime) >= 0)
189911ff6f05SMatthew Garrett 		return 1;
190011ff6f05SMatthew Garrett 
190111ff6f05SMatthew Garrett 	/*
190211ff6f05SMatthew Garrett 	 * Is the previous atime value older than a day? If yes,
190311ff6f05SMatthew Garrett 	 * update atime:
190411ff6f05SMatthew Garrett 	 */
190511ff6f05SMatthew Garrett 	if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
190611ff6f05SMatthew Garrett 		return 1;
190711ff6f05SMatthew Garrett 	/*
190811ff6f05SMatthew Garrett 	 * Good, we can skip the atime update:
190911ff6f05SMatthew Garrett 	 */
191011ff6f05SMatthew Garrett 	return 0;
191111ff6f05SMatthew Garrett }
191211ff6f05SMatthew Garrett 
1913541d4c79SJeff Layton /**
1914541d4c79SJeff Layton  * inode_update_timestamps - update the timestamps on the inode
1915541d4c79SJeff Layton  * @inode: inode to be updated
1916541d4c79SJeff Layton  * @flags: S_* flags that needed to be updated
1917541d4c79SJeff Layton  *
1918541d4c79SJeff Layton  * The update_time function is called when an inode's timestamps need to be
1919541d4c79SJeff Layton  * updated for a read or write operation. This function handles updating the
1920541d4c79SJeff Layton  * actual timestamps. It's up to the caller to ensure that the inode is marked
1921541d4c79SJeff Layton  * dirty appropriately.
1922541d4c79SJeff Layton  *
1923541d4c79SJeff Layton  * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated,
1924541d4c79SJeff Layton  * attempt to update all three of them. S_ATIME updates can be handled
1925541d4c79SJeff Layton  * independently of the rest.
1926541d4c79SJeff Layton  *
1927541d4c79SJeff Layton  * Returns a set of S_* flags indicating which values changed.
1928541d4c79SJeff Layton  */
inode_update_timestamps(struct inode * inode,int flags)1929541d4c79SJeff Layton int inode_update_timestamps(struct inode *inode, int flags)
1930c3b2da31SJosef Bacik {
1931541d4c79SJeff Layton 	int updated = 0;
1932541d4c79SJeff Layton 	struct timespec64 now;
1933c3b2da31SJosef Bacik 
1934541d4c79SJeff Layton 	if (flags & (S_MTIME|S_CTIME|S_VERSION)) {
1935541d4c79SJeff Layton 		struct timespec64 ctime = inode_get_ctime(inode);
19360ae45f63STheodore Ts'o 
1937541d4c79SJeff Layton 		now = inode_set_ctime_current(inode);
1938541d4c79SJeff Layton 		if (!timespec64_equal(&now, &ctime))
1939541d4c79SJeff Layton 			updated |= S_CTIME;
1940541d4c79SJeff Layton 		if (!timespec64_equal(&now, &inode->i_mtime)) {
1941541d4c79SJeff Layton 			inode->i_mtime = now;
1942541d4c79SJeff Layton 			updated |= S_MTIME;
1943541d4c79SJeff Layton 		}
1944541d4c79SJeff Layton 		if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated))
1945541d4c79SJeff Layton 			updated |= S_VERSION;
1946541d4c79SJeff Layton 	} else {
1947541d4c79SJeff Layton 		now = current_time(inode);
1948e20b14dbSEric Biggers 	}
1949e20b14dbSEric Biggers 
1950541d4c79SJeff Layton 	if (flags & S_ATIME) {
1951541d4c79SJeff Layton 		if (!timespec64_equal(&now, &inode->i_atime)) {
1952541d4c79SJeff Layton 			inode->i_atime = now;
1953541d4c79SJeff Layton 			updated |= S_ATIME;
1954541d4c79SJeff Layton 		}
1955541d4c79SJeff Layton 	}
1956541d4c79SJeff Layton 	return updated;
1957541d4c79SJeff Layton }
1958541d4c79SJeff Layton EXPORT_SYMBOL(inode_update_timestamps);
1959e20b14dbSEric Biggers 
1960541d4c79SJeff Layton /**
1961541d4c79SJeff Layton  * generic_update_time - update the timestamps on the inode
1962541d4c79SJeff Layton  * @inode: inode to be updated
1963541d4c79SJeff Layton  * @flags: S_* flags that needed to be updated
1964541d4c79SJeff Layton  *
1965541d4c79SJeff Layton  * The update_time function is called when an inode's timestamps need to be
1966541d4c79SJeff Layton  * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME,
1967541d4c79SJeff Layton  * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME
1968541d4c79SJeff Layton  * updates can be handled done independently of the rest.
1969541d4c79SJeff Layton  *
1970541d4c79SJeff Layton  * Returns a S_* mask indicating which fields were updated.
1971541d4c79SJeff Layton  */
generic_update_time(struct inode * inode,int flags)1972541d4c79SJeff Layton int generic_update_time(struct inode *inode, int flags)
1973541d4c79SJeff Layton {
1974541d4c79SJeff Layton 	int updated = inode_update_timestamps(inode, flags);
1975541d4c79SJeff Layton 	int dirty_flags = 0;
1976541d4c79SJeff Layton 
1977541d4c79SJeff Layton 	if (updated & (S_ATIME|S_MTIME|S_CTIME))
1978541d4c79SJeff Layton 		dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC;
1979541d4c79SJeff Layton 	if (updated & S_VERSION)
1980541d4c79SJeff Layton 		dirty_flags |= I_DIRTY_SYNC;
1981e20b14dbSEric Biggers 	__mark_inode_dirty(inode, dirty_flags);
1982541d4c79SJeff Layton 	return updated;
1983c3b2da31SJosef Bacik }
19840ae45f63STheodore Ts'o EXPORT_SYMBOL(generic_update_time);
19850ae45f63STheodore Ts'o 
19860ae45f63STheodore Ts'o /*
19870ae45f63STheodore Ts'o  * This does the actual work of updating an inodes time or version.  Must have
19880ae45f63STheodore Ts'o  * had called mnt_want_write() before calling this.
19890ae45f63STheodore Ts'o  */
inode_update_time(struct inode * inode,int flags)1990913e9928SJeff Layton int inode_update_time(struct inode *inode, int flags)
19910ae45f63STheodore Ts'o {
199223b424d9SDeepa Dinamani 	if (inode->i_op->update_time)
1993913e9928SJeff Layton 		return inode->i_op->update_time(inode, flags);
1994541d4c79SJeff Layton 	generic_update_time(inode, flags);
1995541d4c79SJeff Layton 	return 0;
19960ae45f63STheodore Ts'o }
1997e60feb44SJosef Bacik EXPORT_SYMBOL(inode_update_time);
1998c3b2da31SJosef Bacik 
19991da177e4SLinus Torvalds /**
2000961f3c89SMauro Carvalho Chehab  *	atime_needs_update	-	update the access time
2001185553b2SRandy Dunlap  *	@path: the &struct path to update
200230fdc8eeSRandy Dunlap  *	@inode: inode to update
20031da177e4SLinus Torvalds  *
20041da177e4SLinus Torvalds  *	Update the accessed time on an inode and mark it for writeback.
20051da177e4SLinus Torvalds  *	This function automatically handles read only file systems and media,
20061da177e4SLinus Torvalds  *	as well as the "noatime" flag and inode specific "noatime" markers.
20071da177e4SLinus Torvalds  */
atime_needs_update(const struct path * path,struct inode * inode)2008c6718543SMiklos Szeredi bool atime_needs_update(const struct path *path, struct inode *inode)
20098fa9dd24SNeilBrown {
20108fa9dd24SNeilBrown 	struct vfsmount *mnt = path->mnt;
201195582b00SDeepa Dinamani 	struct timespec64 now;
20128fa9dd24SNeilBrown 
20138fa9dd24SNeilBrown 	if (inode->i_flags & S_NOATIME)
20148fa9dd24SNeilBrown 		return false;
20150bd23d09SEric W. Biederman 
20160bd23d09SEric W. Biederman 	/* Atime updates will likely cause i_uid and i_gid to be written
20170bd23d09SEric W. Biederman 	 * back improprely if their true value is unknown to the vfs.
20180bd23d09SEric W. Biederman 	 */
20194609e1f1SChristian Brauner 	if (HAS_UNMAPPED_ID(mnt_idmap(mnt), inode))
20200bd23d09SEric W. Biederman 		return false;
20210bd23d09SEric W. Biederman 
20228fa9dd24SNeilBrown 	if (IS_NOATIME(inode))
20238fa9dd24SNeilBrown 		return false;
20241751e8a6SLinus Torvalds 	if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
20258fa9dd24SNeilBrown 		return false;
20268fa9dd24SNeilBrown 
20278fa9dd24SNeilBrown 	if (mnt->mnt_flags & MNT_NOATIME)
20288fa9dd24SNeilBrown 		return false;
20298fa9dd24SNeilBrown 	if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
20308fa9dd24SNeilBrown 		return false;
20318fa9dd24SNeilBrown 
2032c2050a45SDeepa Dinamani 	now = current_time(inode);
20338fa9dd24SNeilBrown 
20346f22b664SArnd Bergmann 	if (!relatime_need_update(mnt, inode, now))
20358fa9dd24SNeilBrown 		return false;
20368fa9dd24SNeilBrown 
203795582b00SDeepa Dinamani 	if (timespec64_equal(&inode->i_atime, &now))
20388fa9dd24SNeilBrown 		return false;
20398fa9dd24SNeilBrown 
20408fa9dd24SNeilBrown 	return true;
20418fa9dd24SNeilBrown }
20428fa9dd24SNeilBrown 
touch_atime(const struct path * path)2043badcf2b7SAl Viro void touch_atime(const struct path *path)
20441da177e4SLinus Torvalds {
204568ac1234SAl Viro 	struct vfsmount *mnt = path->mnt;
2046df2b1afdSDavid Howells 	struct inode *inode = d_inode(path->dentry);
20471da177e4SLinus Torvalds 
2048c6718543SMiklos Szeredi 	if (!atime_needs_update(path, inode))
2049b12536c2SAndi Kleen 		return;
2050b12536c2SAndi Kleen 
20515d37e9e6SJan Kara 	if (!sb_start_write_trylock(inode->i_sb))
2052b12536c2SAndi Kleen 		return;
205347ae32d6SValerie Henson 
20548fa9dd24SNeilBrown 	if (__mnt_want_write(mnt) != 0)
20555d37e9e6SJan Kara 		goto skip_update;
2056c3b2da31SJosef Bacik 	/*
2057c3b2da31SJosef Bacik 	 * File systems can error out when updating inodes if they need to
2058c3b2da31SJosef Bacik 	 * allocate new space to modify an inode (such is the case for
2059c3b2da31SJosef Bacik 	 * Btrfs), but since we touch atime while walking down the path we
2060c3b2da31SJosef Bacik 	 * really don't care if we failed to update the atime of the file,
2061c3b2da31SJosef Bacik 	 * so just ignore the return value.
20622bc55652SAlexander Block 	 * We may also fail on filesystems that have the ability to make parts
20632bc55652SAlexander Block 	 * of the fs read only, e.g. subvolumes in Btrfs.
2064c3b2da31SJosef Bacik 	 */
2065913e9928SJeff Layton 	inode_update_time(inode, S_ATIME);
20665d37e9e6SJan Kara 	__mnt_drop_write(mnt);
20675d37e9e6SJan Kara skip_update:
20685d37e9e6SJan Kara 	sb_end_write(inode->i_sb);
20691da177e4SLinus Torvalds }
2070869243a0SChristoph Hellwig EXPORT_SYMBOL(touch_atime);
20711da177e4SLinus Torvalds 
20723ed37648SCong Wang /*
2073dbfae0cdSJan Kara  * Return mask of changes for notify_change() that need to be done as a
2074dbfae0cdSJan Kara  * response to write or truncate. Return 0 if nothing has to be changed.
2075dbfae0cdSJan Kara  * Negative value on error (change should be denied).
2076dbfae0cdSJan Kara  */
dentry_needs_remove_privs(struct mnt_idmap * idmap,struct dentry * dentry)20779452e93eSChristian Brauner int dentry_needs_remove_privs(struct mnt_idmap *idmap,
2078ed5a7047SChristian Brauner 			      struct dentry *dentry)
2079dbfae0cdSJan Kara {
2080dbfae0cdSJan Kara 	struct inode *inode = d_inode(dentry);
2081dbfae0cdSJan Kara 	int mask = 0;
2082dbfae0cdSJan Kara 	int ret;
2083dbfae0cdSJan Kara 
2084dbfae0cdSJan Kara 	if (IS_NOSEC(inode))
2085dbfae0cdSJan Kara 		return 0;
2086dbfae0cdSJan Kara 
20879452e93eSChristian Brauner 	mask = setattr_should_drop_suidgid(idmap, inode);
2088dbfae0cdSJan Kara 	ret = security_inode_need_killpriv(dentry);
2089dbfae0cdSJan Kara 	if (ret < 0)
2090dbfae0cdSJan Kara 		return ret;
2091dbfae0cdSJan Kara 	if (ret)
2092dbfae0cdSJan Kara 		mask |= ATTR_KILL_PRIV;
2093dbfae0cdSJan Kara 	return mask;
2094dbfae0cdSJan Kara }
2095dbfae0cdSJan Kara 
__remove_privs(struct mnt_idmap * idmap,struct dentry * dentry,int kill)2096abf08576SChristian Brauner static int __remove_privs(struct mnt_idmap *idmap,
2097643fe55aSChristian Brauner 			  struct dentry *dentry, int kill)
20983ed37648SCong Wang {
20993ed37648SCong Wang 	struct iattr newattrs;
21003ed37648SCong Wang 
21013ed37648SCong Wang 	newattrs.ia_valid = ATTR_FORCE | kill;
210227ac0ffeSJ. Bruce Fields 	/*
210327ac0ffeSJ. Bruce Fields 	 * Note we call this on write, so notify_change will not
210427ac0ffeSJ. Bruce Fields 	 * encounter any conflicting delegations:
210527ac0ffeSJ. Bruce Fields 	 */
2106abf08576SChristian Brauner 	return notify_change(idmap, dentry, &newattrs, NULL);
21073ed37648SCong Wang }
21083ed37648SCong Wang 
__file_remove_privs(struct file * file,unsigned int flags)2109faf99b56SStefan Roesch static int __file_remove_privs(struct file *file, unsigned int flags)
21103ed37648SCong Wang {
2111c1892c37SMiklos Szeredi 	struct dentry *dentry = file_dentry(file);
2112c1892c37SMiklos Szeredi 	struct inode *inode = file_inode(file);
211341191cf6SStefan Roesch 	int error = 0;
2114dbfae0cdSJan Kara 	int kill;
21153ed37648SCong Wang 
2116f69e749aSAlexander Lochmann 	if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
21173ed37648SCong Wang 		return 0;
21183ed37648SCong Wang 
21199452e93eSChristian Brauner 	kill = dentry_needs_remove_privs(file_mnt_idmap(file), dentry);
212041191cf6SStefan Roesch 	if (kill < 0)
2121dbfae0cdSJan Kara 		return kill;
2122faf99b56SStefan Roesch 
212341191cf6SStefan Roesch 	if (kill) {
2124faf99b56SStefan Roesch 		if (flags & IOCB_NOWAIT)
2125faf99b56SStefan Roesch 			return -EAGAIN;
2126faf99b56SStefan Roesch 
2127abf08576SChristian Brauner 		error = __remove_privs(file_mnt_idmap(file), dentry, kill);
212841191cf6SStefan Roesch 	}
212941191cf6SStefan Roesch 
21302426f391SJan Kara 	if (!error)
21312426f391SJan Kara 		inode_has_no_xattr(inode);
21323ed37648SCong Wang 	return error;
21333ed37648SCong Wang }
2134faf99b56SStefan Roesch 
2135faf99b56SStefan Roesch /**
2136faf99b56SStefan Roesch  * file_remove_privs - remove special file privileges (suid, capabilities)
2137faf99b56SStefan Roesch  * @file: file to remove privileges from
2138faf99b56SStefan Roesch  *
2139faf99b56SStefan Roesch  * When file is modified by a write or truncation ensure that special
2140faf99b56SStefan Roesch  * file privileges are removed.
2141faf99b56SStefan Roesch  *
2142faf99b56SStefan Roesch  * Return: 0 on success, negative errno on failure.
2143faf99b56SStefan Roesch  */
file_remove_privs(struct file * file)2144faf99b56SStefan Roesch int file_remove_privs(struct file *file)
2145faf99b56SStefan Roesch {
2146faf99b56SStefan Roesch 	return __file_remove_privs(file, 0);
2147faf99b56SStefan Roesch }
21485fa8e0a1SJan Kara EXPORT_SYMBOL(file_remove_privs);
21493ed37648SCong Wang 
inode_needs_update_time(struct inode * inode)2150913e9928SJeff Layton static int inode_needs_update_time(struct inode *inode)
21511da177e4SLinus Torvalds {
2152c3b2da31SJosef Bacik 	int sync_it = 0;
2153647aa768SChristian Brauner 	struct timespec64 now = current_time(inode);
21542276e5baSJeff Layton 	struct timespec64 ctime;
21551da177e4SLinus Torvalds 
2156ce06e0b2SAndi Kleen 	/* First try to exhaust all avenues to not sync */
21571da177e4SLinus Torvalds 	if (IS_NOCMTIME(inode))
2158c3b2da31SJosef Bacik 		return 0;
215920ddee2cSDave Hansen 
2160913e9928SJeff Layton 	if (!timespec64_equal(&inode->i_mtime, &now))
2161ce06e0b2SAndi Kleen 		sync_it = S_MTIME;
2162ce06e0b2SAndi Kleen 
21632276e5baSJeff Layton 	ctime = inode_get_ctime(inode);
2164913e9928SJeff Layton 	if (!timespec64_equal(&ctime, &now))
2165ce06e0b2SAndi Kleen 		sync_it |= S_CTIME;
2166ce06e0b2SAndi Kleen 
2167e38cf302SJeff Layton 	if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
2168ce06e0b2SAndi Kleen 		sync_it |= S_VERSION;
2169ce06e0b2SAndi Kleen 
21706a2aa5d8SStefan Roesch 	return sync_it;
21716a2aa5d8SStefan Roesch }
2172ed97bd37SAndreas Mohr 
__file_update_time(struct file * file,int sync_mode)2173913e9928SJeff Layton static int __file_update_time(struct file *file, int sync_mode)
21746a2aa5d8SStefan Roesch {
21756a2aa5d8SStefan Roesch 	int ret = 0;
21766a2aa5d8SStefan Roesch 	struct inode *inode = file_inode(file);
21776a2aa5d8SStefan Roesch 
21786a2aa5d8SStefan Roesch 	/* try to update time settings */
21796a2aa5d8SStefan Roesch 	if (!__mnt_want_write_file(file)) {
2180913e9928SJeff Layton 		ret = inode_update_time(inode, sync_mode);
2181eb04c282SJan Kara 		__mnt_drop_write_file(file);
21826a2aa5d8SStefan Roesch 	}
2183c3b2da31SJosef Bacik 
2184c3b2da31SJosef Bacik 	return ret;
21851da177e4SLinus Torvalds }
21866a2aa5d8SStefan Roesch 
21876a2aa5d8SStefan Roesch /**
21886a2aa5d8SStefan Roesch  * file_update_time - update mtime and ctime time
21896a2aa5d8SStefan Roesch  * @file: file accessed
21906a2aa5d8SStefan Roesch  *
21916a2aa5d8SStefan Roesch  * Update the mtime and ctime members of an inode and mark the inode for
21926a2aa5d8SStefan Roesch  * writeback. Note that this function is meant exclusively for usage in
21936a2aa5d8SStefan Roesch  * the file write path of filesystems, and filesystems may choose to
21946a2aa5d8SStefan Roesch  * explicitly ignore updates via this function with the _NOCMTIME inode
21956a2aa5d8SStefan Roesch  * flag, e.g. for network filesystem where these imestamps are handled
21966a2aa5d8SStefan Roesch  * by the server. This can return an error for file systems who need to
21976a2aa5d8SStefan Roesch  * allocate space in order to update an inode.
21986a2aa5d8SStefan Roesch  *
21996a2aa5d8SStefan Roesch  * Return: 0 on success, negative errno on failure.
22006a2aa5d8SStefan Roesch  */
file_update_time(struct file * file)22016a2aa5d8SStefan Roesch int file_update_time(struct file *file)
22026a2aa5d8SStefan Roesch {
22036a2aa5d8SStefan Roesch 	int ret;
22046a2aa5d8SStefan Roesch 	struct inode *inode = file_inode(file);
22056a2aa5d8SStefan Roesch 
2206913e9928SJeff Layton 	ret = inode_needs_update_time(inode);
22076a2aa5d8SStefan Roesch 	if (ret <= 0)
22086a2aa5d8SStefan Roesch 		return ret;
22096a2aa5d8SStefan Roesch 
2210913e9928SJeff Layton 	return __file_update_time(file, ret);
22116a2aa5d8SStefan Roesch }
2212870f4817SChristoph Hellwig EXPORT_SYMBOL(file_update_time);
22131da177e4SLinus Torvalds 
2214faf99b56SStefan Roesch /**
221566fa3cedSStefan Roesch  * file_modified_flags - handle mandated vfs changes when modifying a file
221666fa3cedSStefan Roesch  * @file: file that was modified
221766fa3cedSStefan Roesch  * @flags: kiocb flags
221866fa3cedSStefan Roesch  *
221966fa3cedSStefan Roesch  * When file has been modified ensure that special
222066fa3cedSStefan Roesch  * file privileges are removed and time settings are updated.
222166fa3cedSStefan Roesch  *
222266fa3cedSStefan Roesch  * If IOCB_NOWAIT is set, special file privileges will not be removed and
222366fa3cedSStefan Roesch  * time settings will not be updated. It will return -EAGAIN.
222466fa3cedSStefan Roesch  *
222566fa3cedSStefan Roesch  * Context: Caller must hold the file's inode lock.
222666fa3cedSStefan Roesch  *
222766fa3cedSStefan Roesch  * Return: 0 on success, negative errno on failure.
222866fa3cedSStefan Roesch  */
file_modified_flags(struct file * file,int flags)222966fa3cedSStefan Roesch static int file_modified_flags(struct file *file, int flags)
223066fa3cedSStefan Roesch {
223166fa3cedSStefan Roesch 	int ret;
223266fa3cedSStefan Roesch 	struct inode *inode = file_inode(file);
223366fa3cedSStefan Roesch 
223466fa3cedSStefan Roesch 	/*
223566fa3cedSStefan Roesch 	 * Clear the security bits if the process is not being run by root.
223666fa3cedSStefan Roesch 	 * This keeps people from modifying setuid and setgid binaries.
223766fa3cedSStefan Roesch 	 */
223866fa3cedSStefan Roesch 	ret = __file_remove_privs(file, flags);
223966fa3cedSStefan Roesch 	if (ret)
224066fa3cedSStefan Roesch 		return ret;
224166fa3cedSStefan Roesch 
224266fa3cedSStefan Roesch 	if (unlikely(file->f_mode & FMODE_NOCMTIME))
224366fa3cedSStefan Roesch 		return 0;
224466fa3cedSStefan Roesch 
2245913e9928SJeff Layton 	ret = inode_needs_update_time(inode);
224666fa3cedSStefan Roesch 	if (ret <= 0)
224766fa3cedSStefan Roesch 		return ret;
224866fa3cedSStefan Roesch 	if (flags & IOCB_NOWAIT)
224966fa3cedSStefan Roesch 		return -EAGAIN;
225066fa3cedSStefan Roesch 
2251913e9928SJeff Layton 	return __file_update_time(file, ret);
225266fa3cedSStefan Roesch }
225366fa3cedSStefan Roesch 
225466fa3cedSStefan Roesch /**
2255faf99b56SStefan Roesch  * file_modified - handle mandated vfs changes when modifying a file
2256faf99b56SStefan Roesch  * @file: file that was modified
2257faf99b56SStefan Roesch  *
2258faf99b56SStefan Roesch  * When file has been modified ensure that special
2259faf99b56SStefan Roesch  * file privileges are removed and time settings are updated.
2260faf99b56SStefan Roesch  *
2261faf99b56SStefan Roesch  * Context: Caller must hold the file's inode lock.
2262faf99b56SStefan Roesch  *
2263faf99b56SStefan Roesch  * Return: 0 on success, negative errno on failure.
2264faf99b56SStefan Roesch  */
file_modified(struct file * file)2265e38f7f53SAmir Goldstein int file_modified(struct file *file)
2266e38f7f53SAmir Goldstein {
226766fa3cedSStefan Roesch 	return file_modified_flags(file, 0);
2268e38f7f53SAmir Goldstein }
2269e38f7f53SAmir Goldstein EXPORT_SYMBOL(file_modified);
2270e38f7f53SAmir Goldstein 
227166fa3cedSStefan Roesch /**
227266fa3cedSStefan Roesch  * kiocb_modified - handle mandated vfs changes when modifying a file
227366fa3cedSStefan Roesch  * @iocb: iocb that was modified
227466fa3cedSStefan Roesch  *
227566fa3cedSStefan Roesch  * When file has been modified ensure that special
227666fa3cedSStefan Roesch  * file privileges are removed and time settings are updated.
227766fa3cedSStefan Roesch  *
227866fa3cedSStefan Roesch  * Context: Caller must hold the file's inode lock.
227966fa3cedSStefan Roesch  *
228066fa3cedSStefan Roesch  * Return: 0 on success, negative errno on failure.
228166fa3cedSStefan Roesch  */
kiocb_modified(struct kiocb * iocb)228266fa3cedSStefan Roesch int kiocb_modified(struct kiocb *iocb)
228366fa3cedSStefan Roesch {
228466fa3cedSStefan Roesch 	return file_modified_flags(iocb->ki_filp, iocb->ki_flags);
228566fa3cedSStefan Roesch }
228666fa3cedSStefan Roesch EXPORT_SYMBOL_GPL(kiocb_modified);
228766fa3cedSStefan Roesch 
inode_needs_sync(struct inode * inode)22881da177e4SLinus Torvalds int inode_needs_sync(struct inode *inode)
22891da177e4SLinus Torvalds {
22901da177e4SLinus Torvalds 	if (IS_SYNC(inode))
22911da177e4SLinus Torvalds 		return 1;
22921da177e4SLinus Torvalds 	if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
22931da177e4SLinus Torvalds 		return 1;
22941da177e4SLinus Torvalds 	return 0;
22951da177e4SLinus Torvalds }
22961da177e4SLinus Torvalds EXPORT_SYMBOL(inode_needs_sync);
22971da177e4SLinus Torvalds 
22981da177e4SLinus Torvalds /*
2299168a9fd6SMiklos Szeredi  * If we try to find an inode in the inode hash while it is being
2300168a9fd6SMiklos Szeredi  * deleted, we have to wait until the filesystem completes its
2301168a9fd6SMiklos Szeredi  * deletion before reporting that it isn't found.  This function waits
2302168a9fd6SMiklos Szeredi  * until the deletion _might_ have completed.  Callers are responsible
2303168a9fd6SMiklos Szeredi  * to recheck inode state.
2304168a9fd6SMiklos Szeredi  *
2305eaff8079SChristoph Hellwig  * It doesn't matter if I_NEW is not set initially, a call to
2306250df6edSDave Chinner  * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
2307250df6edSDave Chinner  * will DTRT.
23081da177e4SLinus Torvalds  */
__wait_on_freeing_inode(struct inode * inode)23091da177e4SLinus Torvalds static void __wait_on_freeing_inode(struct inode *inode)
23101da177e4SLinus Torvalds {
23111da177e4SLinus Torvalds 	wait_queue_head_t *wq;
2312eaff8079SChristoph Hellwig 	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
2313eaff8079SChristoph Hellwig 	wq = bit_waitqueue(&inode->i_state, __I_NEW);
231421417136SIngo Molnar 	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
2315250df6edSDave Chinner 	spin_unlock(&inode->i_lock);
231667a23c49SDave Chinner 	spin_unlock(&inode_hash_lock);
23171da177e4SLinus Torvalds 	schedule();
231821417136SIngo Molnar 	finish_wait(wq, &wait.wq_entry);
231967a23c49SDave Chinner 	spin_lock(&inode_hash_lock);
23201da177e4SLinus Torvalds }
23211da177e4SLinus Torvalds 
23221da177e4SLinus Torvalds static __initdata unsigned long ihash_entries;
set_ihash_entries(char * str)23231da177e4SLinus Torvalds static int __init set_ihash_entries(char *str)
23241da177e4SLinus Torvalds {
23251da177e4SLinus Torvalds 	if (!str)
23261da177e4SLinus Torvalds 		return 0;
23271da177e4SLinus Torvalds 	ihash_entries = simple_strtoul(str, &str, 0);
23281da177e4SLinus Torvalds 	return 1;
23291da177e4SLinus Torvalds }
23301da177e4SLinus Torvalds __setup("ihash_entries=", set_ihash_entries);
23311da177e4SLinus Torvalds 
23321da177e4SLinus Torvalds /*
23331da177e4SLinus Torvalds  * Initialize the waitqueues and inode hash table.
23341da177e4SLinus Torvalds  */
inode_init_early(void)23351da177e4SLinus Torvalds void __init inode_init_early(void)
23361da177e4SLinus Torvalds {
23371da177e4SLinus Torvalds 	/* If hashes are distributed across NUMA nodes, defer
23381da177e4SLinus Torvalds 	 * hash allocation until vmalloc space is available.
23391da177e4SLinus Torvalds 	 */
23401da177e4SLinus Torvalds 	if (hashdist)
23411da177e4SLinus Torvalds 		return;
23421da177e4SLinus Torvalds 
23431da177e4SLinus Torvalds 	inode_hashtable =
23441da177e4SLinus Torvalds 		alloc_large_system_hash("Inode-cache",
23451da177e4SLinus Torvalds 					sizeof(struct hlist_head),
23461da177e4SLinus Torvalds 					ihash_entries,
23471da177e4SLinus Torvalds 					14,
23483d375d78SPavel Tatashin 					HASH_EARLY | HASH_ZERO,
23491da177e4SLinus Torvalds 					&i_hash_shift,
23501da177e4SLinus Torvalds 					&i_hash_mask,
235131fe62b9STim Bird 					0,
23521da177e4SLinus Torvalds 					0);
23531da177e4SLinus Torvalds }
23541da177e4SLinus Torvalds 
inode_init(void)235574bf17cfSDenis Cheng void __init inode_init(void)
23561da177e4SLinus Torvalds {
23571da177e4SLinus Torvalds 	/* inode slab cache */
2358b0196009SPaul Jackson 	inode_cachep = kmem_cache_create("inode_cache",
2359b0196009SPaul Jackson 					 sizeof(struct inode),
2360b0196009SPaul Jackson 					 0,
2361b0196009SPaul Jackson 					 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
23625d097056SVladimir Davydov 					 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
236320c2df83SPaul Mundt 					 init_once);
23641da177e4SLinus Torvalds 
23651da177e4SLinus Torvalds 	/* Hash may have been set up in inode_init_early */
23661da177e4SLinus Torvalds 	if (!hashdist)
23671da177e4SLinus Torvalds 		return;
23681da177e4SLinus Torvalds 
23691da177e4SLinus Torvalds 	inode_hashtable =
23701da177e4SLinus Torvalds 		alloc_large_system_hash("Inode-cache",
23711da177e4SLinus Torvalds 					sizeof(struct hlist_head),
23721da177e4SLinus Torvalds 					ihash_entries,
23731da177e4SLinus Torvalds 					14,
23743d375d78SPavel Tatashin 					HASH_ZERO,
23751da177e4SLinus Torvalds 					&i_hash_shift,
23761da177e4SLinus Torvalds 					&i_hash_mask,
237731fe62b9STim Bird 					0,
23781da177e4SLinus Torvalds 					0);
23791da177e4SLinus Torvalds }
23801da177e4SLinus Torvalds 
init_special_inode(struct inode * inode,umode_t mode,dev_t rdev)23811da177e4SLinus Torvalds void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
23821da177e4SLinus Torvalds {
23831da177e4SLinus Torvalds 	inode->i_mode = mode;
23841da177e4SLinus Torvalds 	if (S_ISCHR(mode)) {
23851da177e4SLinus Torvalds 		inode->i_fop = &def_chr_fops;
23861da177e4SLinus Torvalds 		inode->i_rdev = rdev;
23871da177e4SLinus Torvalds 	} else if (S_ISBLK(mode)) {
2388bda2795aSChristoph Hellwig 		if (IS_ENABLED(CONFIG_BLOCK))
23891da177e4SLinus Torvalds 			inode->i_fop = &def_blk_fops;
23901da177e4SLinus Torvalds 		inode->i_rdev = rdev;
23911da177e4SLinus Torvalds 	} else if (S_ISFIFO(mode))
2392599a0ac1SAl Viro 		inode->i_fop = &pipefifo_fops;
23931da177e4SLinus Torvalds 	else if (S_ISSOCK(mode))
2394bd9b51e7SAl Viro 		;	/* leave it no_open_fops */
23951da177e4SLinus Torvalds 	else
2396af0d9ae8SManish Katiyar 		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
2397af0d9ae8SManish Katiyar 				  " inode %s:%lu\n", mode, inode->i_sb->s_id,
2398af0d9ae8SManish Katiyar 				  inode->i_ino);
23991da177e4SLinus Torvalds }
24001da177e4SLinus Torvalds EXPORT_SYMBOL(init_special_inode);
2401a1bd120dSDmitry Monakhov 
2402a1bd120dSDmitry Monakhov /**
2403eaae668dSBen Hutchings  * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
2404f2d40141SChristian Brauner  * @idmap: idmap of the mount the inode was created from
2405a1bd120dSDmitry Monakhov  * @inode: New inode
2406a1bd120dSDmitry Monakhov  * @dir: Directory inode
2407a1bd120dSDmitry Monakhov  * @mode: mode of the new inode
240821cb47beSChristian Brauner  *
2409f2d40141SChristian Brauner  * If the inode has been created through an idmapped mount the idmap of
2410f2d40141SChristian Brauner  * the vfsmount must be passed through @idmap. This function will then take
2411f2d40141SChristian Brauner  * care to map the inode according to @idmap before checking permissions
241221cb47beSChristian Brauner  * and initializing i_uid and i_gid. On non-idmapped mounts or if permission
2413f2d40141SChristian Brauner  * checking is to be performed on the raw inode simply pass @nop_mnt_idmap.
2414a1bd120dSDmitry Monakhov  */
inode_init_owner(struct mnt_idmap * idmap,struct inode * inode,const struct inode * dir,umode_t mode)2415f2d40141SChristian Brauner void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
241621cb47beSChristian Brauner 		      const struct inode *dir, umode_t mode)
2417a1bd120dSDmitry Monakhov {
2418c14329d3SChristian Brauner 	inode_fsuid_set(inode, idmap);
2419a1bd120dSDmitry Monakhov 	if (dir && dir->i_mode & S_ISGID) {
2420a1bd120dSDmitry Monakhov 		inode->i_gid = dir->i_gid;
24210fa3ecd8SLinus Torvalds 
24220fa3ecd8SLinus Torvalds 		/* Directories are special, and always inherit S_ISGID */
2423a1bd120dSDmitry Monakhov 		if (S_ISDIR(mode))
2424a1bd120dSDmitry Monakhov 			mode |= S_ISGID;
2425a1bd120dSDmitry Monakhov 	} else
2426c14329d3SChristian Brauner 		inode_fsgid_set(inode, idmap);
2427a1bd120dSDmitry Monakhov 	inode->i_mode = mode;
2428a1bd120dSDmitry Monakhov }
2429a1bd120dSDmitry Monakhov EXPORT_SYMBOL(inode_init_owner);
2430e795b717SSerge E. Hallyn 
24312e149670SSerge E. Hallyn /**
24322e149670SSerge E. Hallyn  * inode_owner_or_capable - check current task permissions to inode
243301beba79SChristian Brauner  * @idmap: idmap of the mount the inode was found from
24342e149670SSerge E. Hallyn  * @inode: inode being checked
24352e149670SSerge E. Hallyn  *
243623adbe12SAndy Lutomirski  * Return true if current either has CAP_FOWNER in a namespace with the
243723adbe12SAndy Lutomirski  * inode owner uid mapped, or owns the file.
243821cb47beSChristian Brauner  *
243901beba79SChristian Brauner  * If the inode has been found through an idmapped mount the idmap of
244001beba79SChristian Brauner  * the vfsmount must be passed through @idmap. This function will then take
244101beba79SChristian Brauner  * care to map the inode according to @idmap before checking permissions.
244221cb47beSChristian Brauner  * On non-idmapped mounts or if permission checking is to be performed on the
244301beba79SChristian Brauner  * raw inode simply passs @nop_mnt_idmap.
2444e795b717SSerge E. Hallyn  */
inode_owner_or_capable(struct mnt_idmap * idmap,const struct inode * inode)244501beba79SChristian Brauner bool inode_owner_or_capable(struct mnt_idmap *idmap,
244621cb47beSChristian Brauner 			    const struct inode *inode)
2447e795b717SSerge E. Hallyn {
2448a2bd096fSChristian Brauner 	vfsuid_t vfsuid;
244923adbe12SAndy Lutomirski 	struct user_namespace *ns;
245023adbe12SAndy Lutomirski 
2451e67fe633SChristian Brauner 	vfsuid = i_uid_into_vfsuid(idmap, inode);
2452a2bd096fSChristian Brauner 	if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
2453e795b717SSerge E. Hallyn 		return true;
245423adbe12SAndy Lutomirski 
245523adbe12SAndy Lutomirski 	ns = current_user_ns();
2456a2bd096fSChristian Brauner 	if (vfsuid_has_mapping(ns, vfsuid) && ns_capable(ns, CAP_FOWNER))
2457e795b717SSerge E. Hallyn 		return true;
2458e795b717SSerge E. Hallyn 	return false;
2459e795b717SSerge E. Hallyn }
24602e149670SSerge E. Hallyn EXPORT_SYMBOL(inode_owner_or_capable);
24611d59d61fSTrond Myklebust 
24621d59d61fSTrond Myklebust /*
24631d59d61fSTrond Myklebust  * Direct i/o helper functions
24641d59d61fSTrond Myklebust  */
__inode_dio_wait(struct inode * inode)24651d59d61fSTrond Myklebust static void __inode_dio_wait(struct inode *inode)
24661d59d61fSTrond Myklebust {
24671d59d61fSTrond Myklebust 	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
24681d59d61fSTrond Myklebust 	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
24691d59d61fSTrond Myklebust 
24701d59d61fSTrond Myklebust 	do {
247121417136SIngo Molnar 		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
24721d59d61fSTrond Myklebust 		if (atomic_read(&inode->i_dio_count))
24731d59d61fSTrond Myklebust 			schedule();
24741d59d61fSTrond Myklebust 	} while (atomic_read(&inode->i_dio_count));
247521417136SIngo Molnar 	finish_wait(wq, &q.wq_entry);
24761d59d61fSTrond Myklebust }
24771d59d61fSTrond Myklebust 
24781d59d61fSTrond Myklebust /**
24791d59d61fSTrond Myklebust  * inode_dio_wait - wait for outstanding DIO requests to finish
24801d59d61fSTrond Myklebust  * @inode: inode to wait for
24811d59d61fSTrond Myklebust  *
24821d59d61fSTrond Myklebust  * Waits for all pending direct I/O requests to finish so that we can
24831d59d61fSTrond Myklebust  * proceed with a truncate or equivalent operation.
24841d59d61fSTrond Myklebust  *
24851d59d61fSTrond Myklebust  * Must be called under a lock that serializes taking new references
24861d59d61fSTrond Myklebust  * to i_dio_count, usually by inode->i_mutex.
24871d59d61fSTrond Myklebust  */
inode_dio_wait(struct inode * inode)24881d59d61fSTrond Myklebust void inode_dio_wait(struct inode *inode)
24891d59d61fSTrond Myklebust {
24901d59d61fSTrond Myklebust 	if (atomic_read(&inode->i_dio_count))
24911d59d61fSTrond Myklebust 		__inode_dio_wait(inode);
24921d59d61fSTrond Myklebust }
24931d59d61fSTrond Myklebust EXPORT_SYMBOL(inode_dio_wait);
24941d59d61fSTrond Myklebust 
24951d59d61fSTrond Myklebust /*
24965f16f322STheodore Ts'o  * inode_set_flags - atomically set some inode flags
24975f16f322STheodore Ts'o  *
24985f16f322STheodore Ts'o  * Note: the caller should be holding i_mutex, or else be sure that
24995f16f322STheodore Ts'o  * they have exclusive access to the inode structure (i.e., while the
25005f16f322STheodore Ts'o  * inode is being instantiated).  The reason for the cmpxchg() loop
25015f16f322STheodore Ts'o  * --- which wouldn't be necessary if all code paths which modify
25025f16f322STheodore Ts'o  * i_flags actually followed this rule, is that there is at least one
25035fa8e0a1SJan Kara  * code path which doesn't today so we use cmpxchg() out of an abundance
25045fa8e0a1SJan Kara  * of caution.
25055f16f322STheodore Ts'o  *
25065f16f322STheodore Ts'o  * In the long run, i_mutex is overkill, and we should probably look
25075f16f322STheodore Ts'o  * at using the i_lock spinlock to protect i_flags, and then make sure
25085f16f322STheodore Ts'o  * it is so documented in include/linux/fs.h and that all code follows
25095f16f322STheodore Ts'o  * the locking convention!!
25105f16f322STheodore Ts'o  */
inode_set_flags(struct inode * inode,unsigned int flags,unsigned int mask)25115f16f322STheodore Ts'o void inode_set_flags(struct inode *inode, unsigned int flags,
25125f16f322STheodore Ts'o 		     unsigned int mask)
25135f16f322STheodore Ts'o {
25145f16f322STheodore Ts'o 	WARN_ON_ONCE(flags & ~mask);
2515a905737fSVineet Gupta 	set_mask_bits(&inode->i_flags, mask, flags);
25165f16f322STheodore Ts'o }
25175f16f322STheodore Ts'o EXPORT_SYMBOL(inode_set_flags);
251821fc61c7SAl Viro 
inode_nohighmem(struct inode * inode)251921fc61c7SAl Viro void inode_nohighmem(struct inode *inode)
252021fc61c7SAl Viro {
252121fc61c7SAl Viro 	mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
252221fc61c7SAl Viro }
252321fc61c7SAl Viro EXPORT_SYMBOL(inode_nohighmem);
25243cd88666SDeepa Dinamani 
25253cd88666SDeepa Dinamani /**
252650e17c00SDeepa Dinamani  * timestamp_truncate - Truncate timespec to a granularity
252750e17c00SDeepa Dinamani  * @t: Timespec
252850e17c00SDeepa Dinamani  * @inode: inode being updated
252950e17c00SDeepa Dinamani  *
253050e17c00SDeepa Dinamani  * Truncate a timespec to the granularity supported by the fs
253150e17c00SDeepa Dinamani  * containing the inode. Always rounds down. gran must
253250e17c00SDeepa Dinamani  * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
253350e17c00SDeepa Dinamani  */
timestamp_truncate(struct timespec64 t,struct inode * inode)253450e17c00SDeepa Dinamani struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
253550e17c00SDeepa Dinamani {
253650e17c00SDeepa Dinamani 	struct super_block *sb = inode->i_sb;
253750e17c00SDeepa Dinamani 	unsigned int gran = sb->s_time_gran;
253850e17c00SDeepa Dinamani 
253950e17c00SDeepa Dinamani 	t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
254050e17c00SDeepa Dinamani 	if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
254150e17c00SDeepa Dinamani 		t.tv_nsec = 0;
254250e17c00SDeepa Dinamani 
254350e17c00SDeepa Dinamani 	/* Avoid division in the common cases 1 ns and 1 s. */
254450e17c00SDeepa Dinamani 	if (gran == 1)
254550e17c00SDeepa Dinamani 		; /* nothing */
254650e17c00SDeepa Dinamani 	else if (gran == NSEC_PER_SEC)
254750e17c00SDeepa Dinamani 		t.tv_nsec = 0;
254850e17c00SDeepa Dinamani 	else if (gran > 1 && gran < NSEC_PER_SEC)
254950e17c00SDeepa Dinamani 		t.tv_nsec -= t.tv_nsec % gran;
255050e17c00SDeepa Dinamani 	else
255150e17c00SDeepa Dinamani 		WARN(1, "invalid file time granularity: %u", gran);
255250e17c00SDeepa Dinamani 	return t;
255350e17c00SDeepa Dinamani }
255450e17c00SDeepa Dinamani EXPORT_SYMBOL(timestamp_truncate);
255550e17c00SDeepa Dinamani 
255650e17c00SDeepa Dinamani /**
25573cd88666SDeepa Dinamani  * current_time - Return FS time
25583cd88666SDeepa Dinamani  * @inode: inode.
25593cd88666SDeepa Dinamani  *
25603cd88666SDeepa Dinamani  * Return the current time truncated to the time granularity supported by
25613cd88666SDeepa Dinamani  * the fs.
25623cd88666SDeepa Dinamani  *
25633cd88666SDeepa Dinamani  * Note that inode and inode->sb cannot be NULL.
25643cd88666SDeepa Dinamani  * Otherwise, the function warns and returns time without truncation.
25653cd88666SDeepa Dinamani  */
current_time(struct inode * inode)256695582b00SDeepa Dinamani struct timespec64 current_time(struct inode *inode)
25673cd88666SDeepa Dinamani {
2568d651d160SArnd Bergmann 	struct timespec64 now;
2569d651d160SArnd Bergmann 
2570d651d160SArnd Bergmann 	ktime_get_coarse_real_ts64(&now);
257150e17c00SDeepa Dinamani 	return timestamp_truncate(now, inode);
25723cd88666SDeepa Dinamani }
25733cd88666SDeepa Dinamani EXPORT_SYMBOL(current_time);
25742b3416ceSYang Xu 
25752b3416ceSYang Xu /**
25769b6304c1SJeff Layton  * inode_set_ctime_current - set the ctime to current_time
25779b6304c1SJeff Layton  * @inode: inode
25789b6304c1SJeff Layton  *
25799b6304c1SJeff Layton  * Set the inode->i_ctime to the current value for the inode. Returns
25809b6304c1SJeff Layton  * the current value that was assigned to i_ctime.
25819b6304c1SJeff Layton  */
inode_set_ctime_current(struct inode * inode)25829b6304c1SJeff Layton struct timespec64 inode_set_ctime_current(struct inode *inode)
25839b6304c1SJeff Layton {
2584647aa768SChristian Brauner 	struct timespec64 now = current_time(inode);
25859b6304c1SJeff Layton 
2586647aa768SChristian Brauner 	inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
25879b6304c1SJeff Layton 	return now;
25889b6304c1SJeff Layton }
25899b6304c1SJeff Layton EXPORT_SYMBOL(inode_set_ctime_current);
25909b6304c1SJeff Layton 
25919b6304c1SJeff Layton /**
259211c2a870SChristian Brauner  * in_group_or_capable - check whether caller is CAP_FSETID privileged
25939452e93eSChristian Brauner  * @idmap:	idmap of the mount @inode was found from
259411c2a870SChristian Brauner  * @inode:	inode to check
259511c2a870SChristian Brauner  * @vfsgid:	the new/current vfsgid of @inode
259611c2a870SChristian Brauner  *
259711c2a870SChristian Brauner  * Check wether @vfsgid is in the caller's group list or if the caller is
259811c2a870SChristian Brauner  * privileged with CAP_FSETID over @inode. This can be used to determine
259911c2a870SChristian Brauner  * whether the setgid bit can be kept or must be dropped.
260011c2a870SChristian Brauner  *
260111c2a870SChristian Brauner  * Return: true if the caller is sufficiently privileged, false if not.
260211c2a870SChristian Brauner  */
in_group_or_capable(struct mnt_idmap * idmap,const struct inode * inode,vfsgid_t vfsgid)26039452e93eSChristian Brauner bool in_group_or_capable(struct mnt_idmap *idmap,
260411c2a870SChristian Brauner 			 const struct inode *inode, vfsgid_t vfsgid)
260511c2a870SChristian Brauner {
260611c2a870SChristian Brauner 	if (vfsgid_in_group_p(vfsgid))
260711c2a870SChristian Brauner 		return true;
26089452e93eSChristian Brauner 	if (capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
260911c2a870SChristian Brauner 		return true;
261011c2a870SChristian Brauner 	return false;
261111c2a870SChristian Brauner }
261211c2a870SChristian Brauner 
261311c2a870SChristian Brauner /**
26142b3416ceSYang Xu  * mode_strip_sgid - handle the sgid bit for non-directories
26159452e93eSChristian Brauner  * @idmap: idmap of the mount the inode was created from
26162b3416ceSYang Xu  * @dir: parent directory inode
26172b3416ceSYang Xu  * @mode: mode of the file to be created in @dir
26182b3416ceSYang Xu  *
26192b3416ceSYang Xu  * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
26202b3416ceSYang Xu  * raised and @dir has the S_ISGID bit raised ensure that the caller is
26212b3416ceSYang Xu  * either in the group of the parent directory or they have CAP_FSETID
26222b3416ceSYang Xu  * in their user namespace and are privileged over the parent directory.
26232b3416ceSYang Xu  * In all other cases, strip the S_ISGID bit from @mode.
26242b3416ceSYang Xu  *
26252b3416ceSYang Xu  * Return: the new mode to use for the file
26262b3416ceSYang Xu  */
mode_strip_sgid(struct mnt_idmap * idmap,const struct inode * dir,umode_t mode)26279452e93eSChristian Brauner umode_t mode_strip_sgid(struct mnt_idmap *idmap,
26282b3416ceSYang Xu 			const struct inode *dir, umode_t mode)
26292b3416ceSYang Xu {
26302b3416ceSYang Xu 	if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
26312b3416ceSYang Xu 		return mode;
26322b3416ceSYang Xu 	if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
26332b3416ceSYang Xu 		return mode;
2634e67fe633SChristian Brauner 	if (in_group_or_capable(idmap, dir, i_gid_into_vfsgid(idmap, dir)))
26352b3416ceSYang Xu 		return mode;
26362b3416ceSYang Xu 	return mode & ~S_ISGID;
26372b3416ceSYang Xu }
26382b3416ceSYang Xu EXPORT_SYMBOL(mode_strip_sgid);
2639