1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * (C) 1997 Linus Torvalds
44b4563dcSChristoph Hellwig * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
51da177e4SLinus Torvalds */
6e59cc473SAl Viro #include <linux/export.h>
71da177e4SLinus Torvalds #include <linux/fs.h>
85970e15dSJeff Layton #include <linux/filelock.h>
91da177e4SLinus Torvalds #include <linux/mm.h>
101da177e4SLinus Torvalds #include <linux/backing-dev.h>
111da177e4SLinus Torvalds #include <linux/hash.h>
121da177e4SLinus Torvalds #include <linux/swap.h>
131da177e4SLinus Torvalds #include <linux/security.h>
141da177e4SLinus Torvalds #include <linux/cdev.h>
1557c8a661SMike Rapoport #include <linux/memblock.h>
163be25f49SEric Paris #include <linux/fsnotify.h>
17fc33a7bbSChristoph Hellwig #include <linux/mount.h>
18f19d4a8fSAl Viro #include <linux/posix_acl.h>
194b4563dcSChristoph Hellwig #include <linux/buffer_head.h> /* for inode_has_buffers */
207ada4db8SMiklos Szeredi #include <linux/ratelimit.h>
21bc3b14cbSDave Chinner #include <linux/list_lru.h>
22ae5e165dSJeff Layton #include <linux/iversion.h>
230ae45f63STheodore Ts'o #include <trace/events/writeback.h>
24a66979abSDave Chinner #include "internal.h"
251da177e4SLinus Torvalds
261da177e4SLinus Torvalds /*
274b4563dcSChristoph Hellwig * Inode locking rules:
28250df6edSDave Chinner *
29250df6edSDave Chinner * inode->i_lock protects:
3010e14073SJchao Sun * inode->i_state, inode->i_hash, __iget(), inode->i_io_list
31bc3b14cbSDave Chinner * Inode LRU list locks protect:
3298b745c6SDave Chinner * inode->i_sb->s_inode_lru, inode->i_lru
3374278da9SDave Chinner * inode->i_sb->s_inode_list_lock protects:
3474278da9SDave Chinner * inode->i_sb->s_inodes, inode->i_sb_list
35f758eeabSChristoph Hellwig * bdi->wb.list_lock protects:
36c7f54084SDave Chinner * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
3767a23c49SDave Chinner * inode_hash_lock protects:
3867a23c49SDave Chinner * inode_hashtable, inode->i_hash
39250df6edSDave Chinner *
40250df6edSDave Chinner * Lock ordering:
4155fa6091SDave Chinner *
4274278da9SDave Chinner * inode->i_sb->s_inode_list_lock
4355fa6091SDave Chinner * inode->i_lock
44bc3b14cbSDave Chinner * Inode LRU list locks
45a66979abSDave Chinner *
46f758eeabSChristoph Hellwig * bdi->wb.list_lock
47a66979abSDave Chinner * inode->i_lock
4867a23c49SDave Chinner *
4967a23c49SDave Chinner * inode_hash_lock
5074278da9SDave Chinner * inode->i_sb->s_inode_list_lock
5167a23c49SDave Chinner * inode->i_lock
5267a23c49SDave Chinner *
5367a23c49SDave Chinner * iunique_lock
5467a23c49SDave Chinner * inode_hash_lock
55250df6edSDave Chinner */
56250df6edSDave Chinner
57fa3536ccSEric Dumazet static unsigned int i_hash_mask __read_mostly;
58fa3536ccSEric Dumazet static unsigned int i_hash_shift __read_mostly;
5967a23c49SDave Chinner static struct hlist_head *inode_hashtable __read_mostly;
6067a23c49SDave Chinner static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
611da177e4SLinus Torvalds
621da177e4SLinus Torvalds /*
637dcda1c9SJens Axboe * Empty aops. Can be used for the cases where the user does not
647dcda1c9SJens Axboe * define any of the address_space operations.
657dcda1c9SJens Axboe */
667dcda1c9SJens Axboe const struct address_space_operations empty_aops = {
677dcda1c9SJens Axboe };
687dcda1c9SJens Axboe EXPORT_SYMBOL(empty_aops);
697dcda1c9SJens Axboe
703942c07cSGlauber Costa static DEFINE_PER_CPU(unsigned long, nr_inodes);
713942c07cSGlauber Costa static DEFINE_PER_CPU(unsigned long, nr_unused);
72cffbc8aaSDave Chinner
73e18b890bSChristoph Lameter static struct kmem_cache *inode_cachep __read_mostly;
741da177e4SLinus Torvalds
get_nr_inodes(void)753942c07cSGlauber Costa static long get_nr_inodes(void)
76cffbc8aaSDave Chinner {
773e880fb5SNick Piggin int i;
783942c07cSGlauber Costa long sum = 0;
793e880fb5SNick Piggin for_each_possible_cpu(i)
803e880fb5SNick Piggin sum += per_cpu(nr_inodes, i);
813e880fb5SNick Piggin return sum < 0 ? 0 : sum;
82cffbc8aaSDave Chinner }
83cffbc8aaSDave Chinner
get_nr_inodes_unused(void)843942c07cSGlauber Costa static inline long get_nr_inodes_unused(void)
85cffbc8aaSDave Chinner {
86fcb94f72SDave Chinner int i;
873942c07cSGlauber Costa long sum = 0;
88fcb94f72SDave Chinner for_each_possible_cpu(i)
89fcb94f72SDave Chinner sum += per_cpu(nr_unused, i);
90fcb94f72SDave Chinner return sum < 0 ? 0 : sum;
91cffbc8aaSDave Chinner }
92cffbc8aaSDave Chinner
get_nr_dirty_inodes(void)933942c07cSGlauber Costa long get_nr_dirty_inodes(void)
94cffbc8aaSDave Chinner {
953e880fb5SNick Piggin /* not actually dirty inodes, but a wild approximation */
963942c07cSGlauber Costa long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
97cffbc8aaSDave Chinner return nr_dirty > 0 ? nr_dirty : 0;
98cffbc8aaSDave Chinner }
99cffbc8aaSDave Chinner
100cffbc8aaSDave Chinner /*
101cffbc8aaSDave Chinner * Handle nr_inode sysctl
102cffbc8aaSDave Chinner */
103cffbc8aaSDave Chinner #ifdef CONFIG_SYSCTL
1041d67fe58SLuis Chamberlain /*
1051d67fe58SLuis Chamberlain * Statistics gathering..
1061d67fe58SLuis Chamberlain */
1071d67fe58SLuis Chamberlain static struct inodes_stat_t inodes_stat;
1081d67fe58SLuis Chamberlain
proc_nr_inodes(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1091d67fe58SLuis Chamberlain static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
1101d67fe58SLuis Chamberlain size_t *lenp, loff_t *ppos)
111cffbc8aaSDave Chinner {
112cffbc8aaSDave Chinner inodes_stat.nr_inodes = get_nr_inodes();
113fcb94f72SDave Chinner inodes_stat.nr_unused = get_nr_inodes_unused();
1143942c07cSGlauber Costa return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
115cffbc8aaSDave Chinner }
1161d67fe58SLuis Chamberlain
1171d67fe58SLuis Chamberlain static struct ctl_table inodes_sysctls[] = {
1181d67fe58SLuis Chamberlain {
1191d67fe58SLuis Chamberlain .procname = "inode-nr",
1201d67fe58SLuis Chamberlain .data = &inodes_stat,
1211d67fe58SLuis Chamberlain .maxlen = 2*sizeof(long),
1221d67fe58SLuis Chamberlain .mode = 0444,
1231d67fe58SLuis Chamberlain .proc_handler = proc_nr_inodes,
1241d67fe58SLuis Chamberlain },
1251d67fe58SLuis Chamberlain {
1261d67fe58SLuis Chamberlain .procname = "inode-state",
1271d67fe58SLuis Chamberlain .data = &inodes_stat,
1281d67fe58SLuis Chamberlain .maxlen = 7*sizeof(long),
1291d67fe58SLuis Chamberlain .mode = 0444,
1301d67fe58SLuis Chamberlain .proc_handler = proc_nr_inodes,
1311d67fe58SLuis Chamberlain },
1321d67fe58SLuis Chamberlain { }
1331d67fe58SLuis Chamberlain };
1341d67fe58SLuis Chamberlain
init_fs_inode_sysctls(void)1351d67fe58SLuis Chamberlain static int __init init_fs_inode_sysctls(void)
1361d67fe58SLuis Chamberlain {
1371d67fe58SLuis Chamberlain register_sysctl_init("fs", inodes_sysctls);
1381d67fe58SLuis Chamberlain return 0;
1391d67fe58SLuis Chamberlain }
1401d67fe58SLuis Chamberlain early_initcall(init_fs_inode_sysctls);
141cffbc8aaSDave Chinner #endif
142cffbc8aaSDave Chinner
no_open(struct inode * inode,struct file * file)143bd9b51e7SAl Viro static int no_open(struct inode *inode, struct file *file)
144bd9b51e7SAl Viro {
145bd9b51e7SAl Viro return -ENXIO;
146bd9b51e7SAl Viro }
147bd9b51e7SAl Viro
1482cb1599fSDavid Chinner /**
1496e7c2b4dSMasahiro Yamada * inode_init_always - perform inode structure initialisation
1500bc02f3fSRandy Dunlap * @sb: superblock inode belongs to
1510bc02f3fSRandy Dunlap * @inode: inode to initialise
1522cb1599fSDavid Chinner *
1532cb1599fSDavid Chinner * These are initializations that need to be done on every inode
1542cb1599fSDavid Chinner * allocation as the fields are not initialised by slab allocation.
1552cb1599fSDavid Chinner */
inode_init_always(struct super_block * sb,struct inode * inode)15654e34621SChristoph Hellwig int inode_init_always(struct super_block *sb, struct inode *inode)
1571da177e4SLinus Torvalds {
1586e1d5dccSAlexey Dobriyan static const struct inode_operations empty_iops;
159bd9b51e7SAl Viro static const struct file_operations no_open_fops = {.open = no_open};
1601da177e4SLinus Torvalds struct address_space *const mapping = &inode->i_data;
1611da177e4SLinus Torvalds
1621da177e4SLinus Torvalds inode->i_sb = sb;
1631da177e4SLinus Torvalds inode->i_blkbits = sb->s_blocksize_bits;
1641da177e4SLinus Torvalds inode->i_flags = 0;
1658019ad13SPeter Zijlstra atomic64_set(&inode->i_sequence, 0);
1661da177e4SLinus Torvalds atomic_set(&inode->i_count, 1);
1671da177e4SLinus Torvalds inode->i_op = &empty_iops;
168bd9b51e7SAl Viro inode->i_fop = &no_open_fops;
169edbb35ccSEric Biggers inode->i_ino = 0;
170a78ef704SMiklos Szeredi inode->__i_nlink = 1;
1713ddcd056SLinus Torvalds inode->i_opflags = 0;
172d0a5b995SAndreas Gruenbacher if (sb->s_xattr)
173d0a5b995SAndreas Gruenbacher inode->i_opflags |= IOP_XATTR;
17492361636SEric W. Biederman i_uid_write(inode, 0);
17592361636SEric W. Biederman i_gid_write(inode, 0);
1761da177e4SLinus Torvalds atomic_set(&inode->i_writecount, 0);
1771da177e4SLinus Torvalds inode->i_size = 0;
178c75b1d94SJens Axboe inode->i_write_hint = WRITE_LIFE_NOT_SET;
1791da177e4SLinus Torvalds inode->i_blocks = 0;
1801da177e4SLinus Torvalds inode->i_bytes = 0;
1811da177e4SLinus Torvalds inode->i_generation = 0;
1821da177e4SLinus Torvalds inode->i_pipe = NULL;
1831da177e4SLinus Torvalds inode->i_cdev = NULL;
18461ba64fcSAl Viro inode->i_link = NULL;
18584e710daSAl Viro inode->i_dir_seq = 0;
1861da177e4SLinus Torvalds inode->i_rdev = 0;
1871da177e4SLinus Torvalds inode->dirtied_when = 0;
1886146f0d5SMimi Zohar
1893d65ae46STahsin Erdogan #ifdef CONFIG_CGROUP_WRITEBACK
1903d65ae46STahsin Erdogan inode->i_wb_frn_winner = 0;
1913d65ae46STahsin Erdogan inode->i_wb_frn_avg_time = 0;
1923d65ae46STahsin Erdogan inode->i_wb_frn_history = 0;
1933d65ae46STahsin Erdogan #endif
1943d65ae46STahsin Erdogan
195d475fd42SPeter Zijlstra spin_lock_init(&inode->i_lock);
196d475fd42SPeter Zijlstra lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
197d475fd42SPeter Zijlstra
1989902af79SAl Viro init_rwsem(&inode->i_rwsem);
1999902af79SAl Viro lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
200d475fd42SPeter Zijlstra
201bd5fe6c5SChristoph Hellwig atomic_set(&inode->i_dio_count, 0);
202d475fd42SPeter Zijlstra
2031da177e4SLinus Torvalds mapping->a_ops = &empty_aops;
2041da177e4SLinus Torvalds mapping->host = inode;
2051da177e4SLinus Torvalds mapping->flags = 0;
206829bc787SDarrick J. Wong mapping->wb_err = 0;
2074bb5f5d9SDavid Herrmann atomic_set(&mapping->i_mmap_writable, 0);
20809d91cdaSSong Liu #ifdef CONFIG_READ_ONLY_THP_FOR_FS
20909d91cdaSSong Liu atomic_set(&mapping->nr_thps, 0);
21009d91cdaSSong Liu #endif
2113c1d4378SHugh Dickins mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
212252aa6f5SRafael Aquini mapping->private_data = NULL;
2137d455e00SChris Mason mapping->writeback_index = 0;
21423ca067bSSebastian Andrzej Siewior init_rwsem(&mapping->invalidate_lock);
21523ca067bSSebastian Andrzej Siewior lockdep_set_class_and_name(&mapping->invalidate_lock,
21623ca067bSSebastian Andrzej Siewior &sb->s_type->invalidate_lock_key,
21723ca067bSSebastian Andrzej Siewior "mapping.invalidate_lock");
2183461e3bfSChristoph Hellwig if (sb->s_iflags & SB_I_STABLE_WRITES)
2193461e3bfSChristoph Hellwig mapping_set_stable_writes(mapping);
220e6c6e640SAl Viro inode->i_private = NULL;
2211da177e4SLinus Torvalds inode->i_mapping = mapping;
222b3d9b7a3SAl Viro INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */
223f19d4a8fSAl Viro #ifdef CONFIG_FS_POSIX_ACL
224f19d4a8fSAl Viro inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
225f19d4a8fSAl Viro #endif
2262cb1599fSDavid Chinner
2273be25f49SEric Paris #ifdef CONFIG_FSNOTIFY
2283be25f49SEric Paris inode->i_fsnotify_mask = 0;
2293be25f49SEric Paris #endif
2304a075e39SJeff Layton inode->i_flctx = NULL;
2312e488f13SDongliang Mu
2322e488f13SDongliang Mu if (unlikely(security_inode_alloc(inode)))
2332e488f13SDongliang Mu return -ENOMEM;
2343e880fb5SNick Piggin this_cpu_inc(nr_inodes);
235cffbc8aaSDave Chinner
23654e34621SChristoph Hellwig return 0;
2371da177e4SLinus Torvalds }
2382cb1599fSDavid Chinner EXPORT_SYMBOL(inode_init_always);
2392cb1599fSDavid Chinner
free_inode_nonrcu(struct inode * inode)240fdb0da89SAl Viro void free_inode_nonrcu(struct inode *inode)
241fdb0da89SAl Viro {
242fdb0da89SAl Viro kmem_cache_free(inode_cachep, inode);
243fdb0da89SAl Viro }
244fdb0da89SAl Viro EXPORT_SYMBOL(free_inode_nonrcu);
245fdb0da89SAl Viro
i_callback(struct rcu_head * head)246fdb0da89SAl Viro static void i_callback(struct rcu_head *head)
247fdb0da89SAl Viro {
248fdb0da89SAl Viro struct inode *inode = container_of(head, struct inode, i_rcu);
249fdb0da89SAl Viro if (inode->free_inode)
250fdb0da89SAl Viro inode->free_inode(inode);
251fdb0da89SAl Viro else
252fdb0da89SAl Viro free_inode_nonrcu(inode);
253fdb0da89SAl Viro }
254fdb0da89SAl Viro
alloc_inode(struct super_block * sb)2552cb1599fSDavid Chinner static struct inode *alloc_inode(struct super_block *sb)
2562cb1599fSDavid Chinner {
257fdb0da89SAl Viro const struct super_operations *ops = sb->s_op;
2582cb1599fSDavid Chinner struct inode *inode;
2592cb1599fSDavid Chinner
260fdb0da89SAl Viro if (ops->alloc_inode)
261fdb0da89SAl Viro inode = ops->alloc_inode(sb);
2622cb1599fSDavid Chinner else
2638b9f3ac5SMuchun Song inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL);
2642cb1599fSDavid Chinner
26554e34621SChristoph Hellwig if (!inode)
2662cb1599fSDavid Chinner return NULL;
26754e34621SChristoph Hellwig
26854e34621SChristoph Hellwig if (unlikely(inode_init_always(sb, inode))) {
269fdb0da89SAl Viro if (ops->destroy_inode) {
270fdb0da89SAl Viro ops->destroy_inode(inode);
271fdb0da89SAl Viro if (!ops->free_inode)
272fdb0da89SAl Viro return NULL;
273fdb0da89SAl Viro }
274fdb0da89SAl Viro inode->free_inode = ops->free_inode;
275fdb0da89SAl Viro i_callback(&inode->i_rcu);
27654e34621SChristoph Hellwig return NULL;
27754e34621SChristoph Hellwig }
27854e34621SChristoph Hellwig
27954e34621SChristoph Hellwig return inode;
2802cb1599fSDavid Chinner }
2811da177e4SLinus Torvalds
__destroy_inode(struct inode * inode)2822e00c97eSChristoph Hellwig void __destroy_inode(struct inode *inode)
2831da177e4SLinus Torvalds {
284b7542f8cSEric Sesterhenn BUG_ON(inode_has_buffers(inode));
28552ebea74STejun Heo inode_detach_wb(inode);
2861da177e4SLinus Torvalds security_inode_free(inode);
2873be25f49SEric Paris fsnotify_inode_delete(inode);
288f27a0fe0SJeff Layton locks_free_lock_context(inode);
2897ada4db8SMiklos Szeredi if (!inode->i_nlink) {
2907ada4db8SMiklos Szeredi WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
2917ada4db8SMiklos Szeredi atomic_long_dec(&inode->i_sb->s_remove_count);
2927ada4db8SMiklos Szeredi }
2937ada4db8SMiklos Szeredi
294f19d4a8fSAl Viro #ifdef CONFIG_FS_POSIX_ACL
295b8a7a3a6SAndreas Gruenbacher if (inode->i_acl && !is_uncached_acl(inode->i_acl))
296f19d4a8fSAl Viro posix_acl_release(inode->i_acl);
297b8a7a3a6SAndreas Gruenbacher if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
298f19d4a8fSAl Viro posix_acl_release(inode->i_default_acl);
299f19d4a8fSAl Viro #endif
3003e880fb5SNick Piggin this_cpu_dec(nr_inodes);
3012e00c97eSChristoph Hellwig }
3022e00c97eSChristoph Hellwig EXPORT_SYMBOL(__destroy_inode);
3032e00c97eSChristoph Hellwig
destroy_inode(struct inode * inode)30456b0dacfSChristoph Hellwig static void destroy_inode(struct inode *inode)
3052e00c97eSChristoph Hellwig {
306fdb0da89SAl Viro const struct super_operations *ops = inode->i_sb->s_op;
307fdb0da89SAl Viro
3087ccf19a8SNick Piggin BUG_ON(!list_empty(&inode->i_lru));
3092e00c97eSChristoph Hellwig __destroy_inode(inode);
310fdb0da89SAl Viro if (ops->destroy_inode) {
311fdb0da89SAl Viro ops->destroy_inode(inode);
312fdb0da89SAl Viro if (!ops->free_inode)
313fdb0da89SAl Viro return;
314fdb0da89SAl Viro }
315fdb0da89SAl Viro inode->free_inode = ops->free_inode;
316fa0d7e3dSNick Piggin call_rcu(&inode->i_rcu, i_callback);
3171da177e4SLinus Torvalds }
3181da177e4SLinus Torvalds
3197ada4db8SMiklos Szeredi /**
3207ada4db8SMiklos Szeredi * drop_nlink - directly drop an inode's link count
3217ada4db8SMiklos Szeredi * @inode: inode
3227ada4db8SMiklos Szeredi *
3237ada4db8SMiklos Szeredi * This is a low-level filesystem helper to replace any
3247ada4db8SMiklos Szeredi * direct filesystem manipulation of i_nlink. In cases
3257ada4db8SMiklos Szeredi * where we are attempting to track writes to the
3267ada4db8SMiklos Szeredi * filesystem, a decrement to zero means an imminent
3277ada4db8SMiklos Szeredi * write when the file is truncated and actually unlinked
3287ada4db8SMiklos Szeredi * on the filesystem.
3297ada4db8SMiklos Szeredi */
drop_nlink(struct inode * inode)3307ada4db8SMiklos Szeredi void drop_nlink(struct inode *inode)
3317ada4db8SMiklos Szeredi {
3327ada4db8SMiklos Szeredi WARN_ON(inode->i_nlink == 0);
3337ada4db8SMiklos Szeredi inode->__i_nlink--;
3347ada4db8SMiklos Szeredi if (!inode->i_nlink)
3357ada4db8SMiklos Szeredi atomic_long_inc(&inode->i_sb->s_remove_count);
3367ada4db8SMiklos Szeredi }
3377ada4db8SMiklos Szeredi EXPORT_SYMBOL(drop_nlink);
3387ada4db8SMiklos Szeredi
3397ada4db8SMiklos Szeredi /**
3407ada4db8SMiklos Szeredi * clear_nlink - directly zero an inode's link count
3417ada4db8SMiklos Szeredi * @inode: inode
3427ada4db8SMiklos Szeredi *
3437ada4db8SMiklos Szeredi * This is a low-level filesystem helper to replace any
3447ada4db8SMiklos Szeredi * direct filesystem manipulation of i_nlink. See
3457ada4db8SMiklos Szeredi * drop_nlink() for why we care about i_nlink hitting zero.
3467ada4db8SMiklos Szeredi */
clear_nlink(struct inode * inode)3477ada4db8SMiklos Szeredi void clear_nlink(struct inode *inode)
3487ada4db8SMiklos Szeredi {
3497ada4db8SMiklos Szeredi if (inode->i_nlink) {
3507ada4db8SMiklos Szeredi inode->__i_nlink = 0;
3517ada4db8SMiklos Szeredi atomic_long_inc(&inode->i_sb->s_remove_count);
3527ada4db8SMiklos Szeredi }
3537ada4db8SMiklos Szeredi }
3547ada4db8SMiklos Szeredi EXPORT_SYMBOL(clear_nlink);
3557ada4db8SMiklos Szeredi
3567ada4db8SMiklos Szeredi /**
3577ada4db8SMiklos Szeredi * set_nlink - directly set an inode's link count
3587ada4db8SMiklos Szeredi * @inode: inode
3597ada4db8SMiklos Szeredi * @nlink: new nlink (should be non-zero)
3607ada4db8SMiklos Szeredi *
3617ada4db8SMiklos Szeredi * This is a low-level filesystem helper to replace any
3627ada4db8SMiklos Szeredi * direct filesystem manipulation of i_nlink.
3637ada4db8SMiklos Szeredi */
set_nlink(struct inode * inode,unsigned int nlink)3647ada4db8SMiklos Szeredi void set_nlink(struct inode *inode, unsigned int nlink)
3657ada4db8SMiklos Szeredi {
3667ada4db8SMiklos Szeredi if (!nlink) {
3677ada4db8SMiklos Szeredi clear_nlink(inode);
3687ada4db8SMiklos Szeredi } else {
3697ada4db8SMiklos Szeredi /* Yes, some filesystems do change nlink from zero to one */
3707ada4db8SMiklos Szeredi if (inode->i_nlink == 0)
3717ada4db8SMiklos Szeredi atomic_long_dec(&inode->i_sb->s_remove_count);
3727ada4db8SMiklos Szeredi
3737ada4db8SMiklos Szeredi inode->__i_nlink = nlink;
3747ada4db8SMiklos Szeredi }
3757ada4db8SMiklos Szeredi }
3767ada4db8SMiklos Szeredi EXPORT_SYMBOL(set_nlink);
3777ada4db8SMiklos Szeredi
3787ada4db8SMiklos Szeredi /**
3797ada4db8SMiklos Szeredi * inc_nlink - directly increment an inode's link count
3807ada4db8SMiklos Szeredi * @inode: inode
3817ada4db8SMiklos Szeredi *
3827ada4db8SMiklos Szeredi * This is a low-level filesystem helper to replace any
3837ada4db8SMiklos Szeredi * direct filesystem manipulation of i_nlink. Currently,
3847ada4db8SMiklos Szeredi * it is only here for parity with dec_nlink().
3857ada4db8SMiklos Szeredi */
inc_nlink(struct inode * inode)3867ada4db8SMiklos Szeredi void inc_nlink(struct inode *inode)
3877ada4db8SMiklos Szeredi {
388f4e0c30cSAl Viro if (unlikely(inode->i_nlink == 0)) {
389f4e0c30cSAl Viro WARN_ON(!(inode->i_state & I_LINKABLE));
3907ada4db8SMiklos Szeredi atomic_long_dec(&inode->i_sb->s_remove_count);
391f4e0c30cSAl Viro }
3927ada4db8SMiklos Szeredi
3937ada4db8SMiklos Szeredi inode->__i_nlink++;
3947ada4db8SMiklos Szeredi }
3957ada4db8SMiklos Szeredi EXPORT_SYMBOL(inc_nlink);
3967ada4db8SMiklos Szeredi
__address_space_init_once(struct address_space * mapping)397ae23395dSDave Chinner static void __address_space_init_once(struct address_space *mapping)
3982aa15890SMiklos Szeredi {
3997b785645SJohannes Weiner xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
400c8c06efaSDavidlohr Bueso init_rwsem(&mapping->i_mmap_rwsem);
4012aa15890SMiklos Szeredi INIT_LIST_HEAD(&mapping->private_list);
4022aa15890SMiklos Szeredi spin_lock_init(&mapping->private_lock);
403f808c13fSDavidlohr Bueso mapping->i_mmap = RB_ROOT_CACHED;
4042aa15890SMiklos Szeredi }
405ae23395dSDave Chinner
address_space_init_once(struct address_space * mapping)406ae23395dSDave Chinner void address_space_init_once(struct address_space *mapping)
407ae23395dSDave Chinner {
408ae23395dSDave Chinner memset(mapping, 0, sizeof(*mapping));
409ae23395dSDave Chinner __address_space_init_once(mapping);
410ae23395dSDave Chinner }
4112aa15890SMiklos Szeredi EXPORT_SYMBOL(address_space_init_once);
4122aa15890SMiklos Szeredi
4131da177e4SLinus Torvalds /*
4141da177e4SLinus Torvalds * These are initializations that only need to be done
4151da177e4SLinus Torvalds * once, because the fields are idempotent across use
4161da177e4SLinus Torvalds * of the inode, so let the slab aware of that.
4171da177e4SLinus Torvalds */
inode_init_once(struct inode * inode)4181da177e4SLinus Torvalds void inode_init_once(struct inode *inode)
4191da177e4SLinus Torvalds {
4201da177e4SLinus Torvalds memset(inode, 0, sizeof(*inode));
4211da177e4SLinus Torvalds INIT_HLIST_NODE(&inode->i_hash);
4221da177e4SLinus Torvalds INIT_LIST_HEAD(&inode->i_devices);
423c7f54084SDave Chinner INIT_LIST_HEAD(&inode->i_io_list);
4246c60d2b5SDave Chinner INIT_LIST_HEAD(&inode->i_wb_list);
4257ccf19a8SNick Piggin INIT_LIST_HEAD(&inode->i_lru);
42618cc912bSJeff Layton INIT_LIST_HEAD(&inode->i_sb_list);
427ae23395dSDave Chinner __address_space_init_once(&inode->i_data);
4281da177e4SLinus Torvalds i_size_ordered_init(inode);
4291da177e4SLinus Torvalds }
4301da177e4SLinus Torvalds EXPORT_SYMBOL(inode_init_once);
4311da177e4SLinus Torvalds
init_once(void * foo)43251cc5068SAlexey Dobriyan static void init_once(void *foo)
4331da177e4SLinus Torvalds {
4341da177e4SLinus Torvalds struct inode *inode = (struct inode *) foo;
4351da177e4SLinus Torvalds
4361da177e4SLinus Torvalds inode_init_once(inode);
4371da177e4SLinus Torvalds }
4381da177e4SLinus Torvalds
4391da177e4SLinus Torvalds /*
440250df6edSDave Chinner * inode->i_lock must be held
4411da177e4SLinus Torvalds */
__iget(struct inode * inode)4421da177e4SLinus Torvalds void __iget(struct inode *inode)
4431da177e4SLinus Torvalds {
4449e38d86fSNick Piggin atomic_inc(&inode->i_count);
4451da177e4SLinus Torvalds }
4461da177e4SLinus Torvalds
4477de9c6eeSAl Viro /*
4487de9c6eeSAl Viro * get additional reference to inode; caller must already hold one.
4497de9c6eeSAl Viro */
ihold(struct inode * inode)4507de9c6eeSAl Viro void ihold(struct inode *inode)
4517de9c6eeSAl Viro {
4527de9c6eeSAl Viro WARN_ON(atomic_inc_return(&inode->i_count) < 2);
4537de9c6eeSAl Viro }
4547de9c6eeSAl Viro EXPORT_SYMBOL(ihold);
4557de9c6eeSAl Viro
__inode_add_lru(struct inode * inode,bool rotate)45651b8c1feSJohannes Weiner static void __inode_add_lru(struct inode *inode, bool rotate)
4579e38d86fSNick Piggin {
45851b8c1feSJohannes Weiner if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE))
45951b8c1feSJohannes Weiner return;
46051b8c1feSJohannes Weiner if (atomic_read(&inode->i_count))
46151b8c1feSJohannes Weiner return;
46251b8c1feSJohannes Weiner if (!(inode->i_sb->s_flags & SB_ACTIVE))
46351b8c1feSJohannes Weiner return;
46451b8c1feSJohannes Weiner if (!mapping_shrinkable(&inode->i_data))
46551b8c1feSJohannes Weiner return;
46651b8c1feSJohannes Weiner
467bc3b14cbSDave Chinner if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
468fcb94f72SDave Chinner this_cpu_inc(nr_unused);
46951b8c1feSJohannes Weiner else if (rotate)
470563f4001SJosef Bacik inode->i_state |= I_REFERENCED;
4719e38d86fSNick Piggin }
4729e38d86fSNick Piggin
4734eff96ddSJan Kara /*
4744eff96ddSJan Kara * Add inode to LRU if needed (inode is unused and clean).
4754eff96ddSJan Kara *
4764eff96ddSJan Kara * Needs inode->i_lock held.
4774eff96ddSJan Kara */
inode_add_lru(struct inode * inode)4784eff96ddSJan Kara void inode_add_lru(struct inode *inode)
4794eff96ddSJan Kara {
48051b8c1feSJohannes Weiner __inode_add_lru(inode, false);
4814eff96ddSJan Kara }
4824eff96ddSJan Kara
inode_lru_list_del(struct inode * inode)4839e38d86fSNick Piggin static void inode_lru_list_del(struct inode *inode)
4849e38d86fSNick Piggin {
485bc3b14cbSDave Chinner if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
486fcb94f72SDave Chinner this_cpu_dec(nr_unused);
4871da177e4SLinus Torvalds }
4881da177e4SLinus Torvalds
inode_pin_lru_isolating(struct inode * inode)489b9bda5f6SZhihao Cheng static void inode_pin_lru_isolating(struct inode *inode)
490b9bda5f6SZhihao Cheng {
491b9bda5f6SZhihao Cheng lockdep_assert_held(&inode->i_lock);
492b9bda5f6SZhihao Cheng WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE));
493b9bda5f6SZhihao Cheng inode->i_state |= I_LRU_ISOLATING;
494b9bda5f6SZhihao Cheng }
495b9bda5f6SZhihao Cheng
inode_unpin_lru_isolating(struct inode * inode)496b9bda5f6SZhihao Cheng static void inode_unpin_lru_isolating(struct inode *inode)
497b9bda5f6SZhihao Cheng {
498b9bda5f6SZhihao Cheng spin_lock(&inode->i_lock);
499b9bda5f6SZhihao Cheng WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
500b9bda5f6SZhihao Cheng inode->i_state &= ~I_LRU_ISOLATING;
501b9bda5f6SZhihao Cheng smp_mb();
502b9bda5f6SZhihao Cheng wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
503b9bda5f6SZhihao Cheng spin_unlock(&inode->i_lock);
504b9bda5f6SZhihao Cheng }
505b9bda5f6SZhihao Cheng
inode_wait_for_lru_isolating(struct inode * inode)506b9bda5f6SZhihao Cheng static void inode_wait_for_lru_isolating(struct inode *inode)
507b9bda5f6SZhihao Cheng {
508b9bda5f6SZhihao Cheng spin_lock(&inode->i_lock);
509b9bda5f6SZhihao Cheng if (inode->i_state & I_LRU_ISOLATING) {
510b9bda5f6SZhihao Cheng DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
511b9bda5f6SZhihao Cheng wait_queue_head_t *wqh;
512b9bda5f6SZhihao Cheng
513b9bda5f6SZhihao Cheng wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
514b9bda5f6SZhihao Cheng spin_unlock(&inode->i_lock);
515b9bda5f6SZhihao Cheng __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
516b9bda5f6SZhihao Cheng spin_lock(&inode->i_lock);
517b9bda5f6SZhihao Cheng WARN_ON(inode->i_state & I_LRU_ISOLATING);
518b9bda5f6SZhihao Cheng }
519b9bda5f6SZhihao Cheng spin_unlock(&inode->i_lock);
520b9bda5f6SZhihao Cheng }
521b9bda5f6SZhihao Cheng
522646ec461SChristoph Hellwig /**
523646ec461SChristoph Hellwig * inode_sb_list_add - add inode to the superblock list of inodes
524646ec461SChristoph Hellwig * @inode: inode to add
525646ec461SChristoph Hellwig */
inode_sb_list_add(struct inode * inode)526646ec461SChristoph Hellwig void inode_sb_list_add(struct inode *inode)
527646ec461SChristoph Hellwig {
52874278da9SDave Chinner spin_lock(&inode->i_sb->s_inode_list_lock);
52955fa6091SDave Chinner list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
53074278da9SDave Chinner spin_unlock(&inode->i_sb->s_inode_list_lock);
531646ec461SChristoph Hellwig }
532646ec461SChristoph Hellwig EXPORT_SYMBOL_GPL(inode_sb_list_add);
533646ec461SChristoph Hellwig
inode_sb_list_del(struct inode * inode)53455fa6091SDave Chinner static inline void inode_sb_list_del(struct inode *inode)
535646ec461SChristoph Hellwig {
536a209dfc7SEric Dumazet if (!list_empty(&inode->i_sb_list)) {
53774278da9SDave Chinner spin_lock(&inode->i_sb->s_inode_list_lock);
538646ec461SChristoph Hellwig list_del_init(&inode->i_sb_list);
53974278da9SDave Chinner spin_unlock(&inode->i_sb->s_inode_list_lock);
540646ec461SChristoph Hellwig }
541a209dfc7SEric Dumazet }
542646ec461SChristoph Hellwig
hash(struct super_block * sb,unsigned long hashval)5434c51acbcSDave Chinner static unsigned long hash(struct super_block *sb, unsigned long hashval)
5444c51acbcSDave Chinner {
5454c51acbcSDave Chinner unsigned long tmp;
5464c51acbcSDave Chinner
5474c51acbcSDave Chinner tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
5484c51acbcSDave Chinner L1_CACHE_BYTES;
5494b4563dcSChristoph Hellwig tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
5504b4563dcSChristoph Hellwig return tmp & i_hash_mask;
5514c51acbcSDave Chinner }
5524c51acbcSDave Chinner
5534c51acbcSDave Chinner /**
5544c51acbcSDave Chinner * __insert_inode_hash - hash an inode
5554c51acbcSDave Chinner * @inode: unhashed inode
5564c51acbcSDave Chinner * @hashval: unsigned long value used to locate this object in the
5574c51acbcSDave Chinner * inode_hashtable.
5584c51acbcSDave Chinner *
5594c51acbcSDave Chinner * Add an inode to the inode hash for this superblock.
5604c51acbcSDave Chinner */
__insert_inode_hash(struct inode * inode,unsigned long hashval)5614c51acbcSDave Chinner void __insert_inode_hash(struct inode *inode, unsigned long hashval)
5624c51acbcSDave Chinner {
563646ec461SChristoph Hellwig struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
564646ec461SChristoph Hellwig
56567a23c49SDave Chinner spin_lock(&inode_hash_lock);
566250df6edSDave Chinner spin_lock(&inode->i_lock);
5673f19b2abSDavid Howells hlist_add_head_rcu(&inode->i_hash, b);
568250df6edSDave Chinner spin_unlock(&inode->i_lock);
56967a23c49SDave Chinner spin_unlock(&inode_hash_lock);
5704c51acbcSDave Chinner }
5714c51acbcSDave Chinner EXPORT_SYMBOL(__insert_inode_hash);
5724c51acbcSDave Chinner
5734c51acbcSDave Chinner /**
574f2ee7abfSEric Dumazet * __remove_inode_hash - remove an inode from the hash
5754c51acbcSDave Chinner * @inode: inode to unhash
5764c51acbcSDave Chinner *
5774c51acbcSDave Chinner * Remove an inode from the superblock.
5784c51acbcSDave Chinner */
__remove_inode_hash(struct inode * inode)579f2ee7abfSEric Dumazet void __remove_inode_hash(struct inode *inode)
5804c51acbcSDave Chinner {
58167a23c49SDave Chinner spin_lock(&inode_hash_lock);
582250df6edSDave Chinner spin_lock(&inode->i_lock);
5833f19b2abSDavid Howells hlist_del_init_rcu(&inode->i_hash);
584250df6edSDave Chinner spin_unlock(&inode->i_lock);
58567a23c49SDave Chinner spin_unlock(&inode_hash_lock);
5864c51acbcSDave Chinner }
587f2ee7abfSEric Dumazet EXPORT_SYMBOL(__remove_inode_hash);
5884c51acbcSDave Chinner
dump_mapping(const struct address_space * mapping)5893e9d80a8SMatthew Wilcox (Oracle) void dump_mapping(const struct address_space *mapping)
5903e9d80a8SMatthew Wilcox (Oracle) {
5913e9d80a8SMatthew Wilcox (Oracle) struct inode *host;
5923e9d80a8SMatthew Wilcox (Oracle) const struct address_space_operations *a_ops;
5933e9d80a8SMatthew Wilcox (Oracle) struct hlist_node *dentry_first;
5943e9d80a8SMatthew Wilcox (Oracle) struct dentry *dentry_ptr;
5953e9d80a8SMatthew Wilcox (Oracle) struct dentry dentry;
596*e0f6ee75SLi Zhijian char fname[64] = {};
5973e9d80a8SMatthew Wilcox (Oracle) unsigned long ino;
5983e9d80a8SMatthew Wilcox (Oracle)
5993e9d80a8SMatthew Wilcox (Oracle) /*
6003e9d80a8SMatthew Wilcox (Oracle) * If mapping is an invalid pointer, we don't want to crash
6013e9d80a8SMatthew Wilcox (Oracle) * accessing it, so probe everything depending on it carefully.
6023e9d80a8SMatthew Wilcox (Oracle) */
6033e9d80a8SMatthew Wilcox (Oracle) if (get_kernel_nofault(host, &mapping->host) ||
6043e9d80a8SMatthew Wilcox (Oracle) get_kernel_nofault(a_ops, &mapping->a_ops)) {
6053e9d80a8SMatthew Wilcox (Oracle) pr_warn("invalid mapping:%px\n", mapping);
6063e9d80a8SMatthew Wilcox (Oracle) return;
6073e9d80a8SMatthew Wilcox (Oracle) }
6083e9d80a8SMatthew Wilcox (Oracle)
6093e9d80a8SMatthew Wilcox (Oracle) if (!host) {
6103e9d80a8SMatthew Wilcox (Oracle) pr_warn("aops:%ps\n", a_ops);
6113e9d80a8SMatthew Wilcox (Oracle) return;
6123e9d80a8SMatthew Wilcox (Oracle) }
6133e9d80a8SMatthew Wilcox (Oracle)
6143e9d80a8SMatthew Wilcox (Oracle) if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
6153e9d80a8SMatthew Wilcox (Oracle) get_kernel_nofault(ino, &host->i_ino)) {
6163e9d80a8SMatthew Wilcox (Oracle) pr_warn("aops:%ps invalid inode:%px\n", a_ops, host);
6173e9d80a8SMatthew Wilcox (Oracle) return;
6183e9d80a8SMatthew Wilcox (Oracle) }
6193e9d80a8SMatthew Wilcox (Oracle)
6203e9d80a8SMatthew Wilcox (Oracle) if (!dentry_first) {
6213e9d80a8SMatthew Wilcox (Oracle) pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
6223e9d80a8SMatthew Wilcox (Oracle) return;
6233e9d80a8SMatthew Wilcox (Oracle) }
6243e9d80a8SMatthew Wilcox (Oracle)
6253e9d80a8SMatthew Wilcox (Oracle) dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
6263e9d80a8SMatthew Wilcox (Oracle) if (get_kernel_nofault(dentry, dentry_ptr)) {
6273e9d80a8SMatthew Wilcox (Oracle) pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
6283e9d80a8SMatthew Wilcox (Oracle) a_ops, ino, dentry_ptr);
6293e9d80a8SMatthew Wilcox (Oracle) return;
6303e9d80a8SMatthew Wilcox (Oracle) }
6313e9d80a8SMatthew Wilcox (Oracle)
632*e0f6ee75SLi Zhijian if (strncpy_from_kernel_nofault(fname, dentry.d_name.name, 63) < 0)
633*e0f6ee75SLi Zhijian strscpy(fname, "<invalid>", 63);
6343e9d80a8SMatthew Wilcox (Oracle) /*
635*e0f6ee75SLi Zhijian * Even if strncpy_from_kernel_nofault() succeeded,
636*e0f6ee75SLi Zhijian * the fname could be unreliable
6373e9d80a8SMatthew Wilcox (Oracle) */
638*e0f6ee75SLi Zhijian pr_warn("aops:%ps ino:%lx dentry name(?):\"%s\"\n",
639*e0f6ee75SLi Zhijian a_ops, ino, fname);
6403e9d80a8SMatthew Wilcox (Oracle) }
6413e9d80a8SMatthew Wilcox (Oracle)
clear_inode(struct inode * inode)642dbd5768fSJan Kara void clear_inode(struct inode *inode)
643b0683aa6SAl Viro {
64408142579SJan Kara /*
645b93b0163SMatthew Wilcox * We have to cycle the i_pages lock here because reclaim can be in the
6466ffcd825SMatthew Wilcox (Oracle) * process of removing the last page (in __filemap_remove_folio())
647b93b0163SMatthew Wilcox * and we must not free the mapping under it.
64808142579SJan Kara */
649b93b0163SMatthew Wilcox xa_lock_irq(&inode->i_data.i_pages);
650b0683aa6SAl Viro BUG_ON(inode->i_data.nrpages);
651786b3112SHugh Dickins /*
652786b3112SHugh Dickins * Almost always, mapping_empty(&inode->i_data) here; but there are
653786b3112SHugh Dickins * two known and long-standing ways in which nodes may get left behind
654786b3112SHugh Dickins * (when deep radix-tree node allocation failed partway; or when THP
655786b3112SHugh Dickins * collapse_file() failed). Until those two known cases are cleaned up,
656786b3112SHugh Dickins * or a cleanup function is called here, do not BUG_ON(!mapping_empty),
657786b3112SHugh Dickins * nor even WARN_ON(!mapping_empty).
658786b3112SHugh Dickins */
659b93b0163SMatthew Wilcox xa_unlock_irq(&inode->i_data.i_pages);
660b0683aa6SAl Viro BUG_ON(!list_empty(&inode->i_data.private_list));
661b0683aa6SAl Viro BUG_ON(!(inode->i_state & I_FREEING));
662b0683aa6SAl Viro BUG_ON(inode->i_state & I_CLEAR);
6636c60d2b5SDave Chinner BUG_ON(!list_empty(&inode->i_wb_list));
664fa0d7e3dSNick Piggin /* don't need i_lock here, no concurrent mods to i_state */
665b0683aa6SAl Viro inode->i_state = I_FREEING | I_CLEAR;
666b0683aa6SAl Viro }
667dbd5768fSJan Kara EXPORT_SYMBOL(clear_inode);
668b0683aa6SAl Viro
669b2b2af8eSDave Chinner /*
670b2b2af8eSDave Chinner * Free the inode passed in, removing it from the lists it is still connected
671b2b2af8eSDave Chinner * to. We remove any pages still attached to the inode and wait for any IO that
672b2b2af8eSDave Chinner * is still in progress before finally destroying the inode.
673b2b2af8eSDave Chinner *
674b2b2af8eSDave Chinner * An inode must already be marked I_FREEING so that we avoid the inode being
675b2b2af8eSDave Chinner * moved back onto lists if we race with other code that manipulates the lists
676b2b2af8eSDave Chinner * (e.g. writeback_single_inode). The caller is responsible for setting this.
677b2b2af8eSDave Chinner *
678b2b2af8eSDave Chinner * An inode must already be removed from the LRU list before being evicted from
679b2b2af8eSDave Chinner * the cache. This should occur atomically with setting the I_FREEING state
680b2b2af8eSDave Chinner * flag, so no inodes here should ever be on the LRU when being evicted.
681b2b2af8eSDave Chinner */
evict(struct inode * inode)682644da596SAl Viro static void evict(struct inode *inode)
683b4272d4cSAl Viro {
684b4272d4cSAl Viro const struct super_operations *op = inode->i_sb->s_op;
685b4272d4cSAl Viro
686b2b2af8eSDave Chinner BUG_ON(!(inode->i_state & I_FREEING));
687b2b2af8eSDave Chinner BUG_ON(!list_empty(&inode->i_lru));
688b2b2af8eSDave Chinner
689c7f54084SDave Chinner if (!list_empty(&inode->i_io_list))
690c7f54084SDave Chinner inode_io_list_del(inode);
691b12362bdSEric Dumazet
69255fa6091SDave Chinner inode_sb_list_del(inode);
69355fa6091SDave Chinner
694b9bda5f6SZhihao Cheng inode_wait_for_lru_isolating(inode);
695b9bda5f6SZhihao Cheng
696169ebd90SJan Kara /*
697169ebd90SJan Kara * Wait for flusher thread to be done with the inode so that filesystem
698169ebd90SJan Kara * does not start destroying it while writeback is still running. Since
699169ebd90SJan Kara * the inode has I_FREEING set, flusher thread won't start new work on
700169ebd90SJan Kara * the inode. We just have to wait for running writeback to finish.
701169ebd90SJan Kara */
702169ebd90SJan Kara inode_wait_for_writeback(inode);
7037994e6f7SJan Kara
704be7ce416SAl Viro if (op->evict_inode) {
705be7ce416SAl Viro op->evict_inode(inode);
706b4272d4cSAl Viro } else {
70791b0abe3SJohannes Weiner truncate_inode_pages_final(&inode->i_data);
708dbd5768fSJan Kara clear_inode(inode);
709b4272d4cSAl Viro }
710661074e9SAl Viro if (S_ISCHR(inode->i_mode) && inode->i_cdev)
711661074e9SAl Viro cd_forget(inode);
712b2b2af8eSDave Chinner
713b2b2af8eSDave Chinner remove_inode_hash(inode);
714b2b2af8eSDave Chinner
715b2b2af8eSDave Chinner spin_lock(&inode->i_lock);
716b2b2af8eSDave Chinner wake_up_bit(&inode->i_state, __I_NEW);
717b2b2af8eSDave Chinner BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
718b2b2af8eSDave Chinner spin_unlock(&inode->i_lock);
719b2b2af8eSDave Chinner
720b2b2af8eSDave Chinner destroy_inode(inode);
721b4272d4cSAl Viro }
722b4272d4cSAl Viro
7231da177e4SLinus Torvalds /*
7241da177e4SLinus Torvalds * dispose_list - dispose of the contents of a local list
7251da177e4SLinus Torvalds * @head: the head of the list to free
7261da177e4SLinus Torvalds *
7271da177e4SLinus Torvalds * Dispose-list gets a local list with local inodes in it, so it doesn't
7281da177e4SLinus Torvalds * need to worry about list corruption and SMP locks.
7291da177e4SLinus Torvalds */
dispose_list(struct list_head * head)7301da177e4SLinus Torvalds static void dispose_list(struct list_head *head)
7311da177e4SLinus Torvalds {
7321da177e4SLinus Torvalds while (!list_empty(head)) {
7331da177e4SLinus Torvalds struct inode *inode;
7341da177e4SLinus Torvalds
7357ccf19a8SNick Piggin inode = list_first_entry(head, struct inode, i_lru);
7367ccf19a8SNick Piggin list_del_init(&inode->i_lru);
7371da177e4SLinus Torvalds
738644da596SAl Viro evict(inode);
739ac05fbb4SJosef Bacik cond_resched();
7401da177e4SLinus Torvalds }
7411da177e4SLinus Torvalds }
7421da177e4SLinus Torvalds
7431da177e4SLinus Torvalds /**
74463997e98SAl Viro * evict_inodes - evict all evictable inodes for a superblock
74563997e98SAl Viro * @sb: superblock to operate on
7461da177e4SLinus Torvalds *
74763997e98SAl Viro * Make sure that no inodes with zero refcount are retained. This is
7481751e8a6SLinus Torvalds * called by superblock shutdown after having SB_ACTIVE flag removed,
74963997e98SAl Viro * so any inode reaching zero refcount during or after that call will
75063997e98SAl Viro * be immediately evicted.
75163997e98SAl Viro */
evict_inodes(struct super_block * sb)75263997e98SAl Viro void evict_inodes(struct super_block *sb)
75363997e98SAl Viro {
75463997e98SAl Viro struct inode *inode, *next;
75563997e98SAl Viro LIST_HEAD(dispose);
75663997e98SAl Viro
757ac05fbb4SJosef Bacik again:
75874278da9SDave Chinner spin_lock(&sb->s_inode_list_lock);
75963997e98SAl Viro list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
76063997e98SAl Viro if (atomic_read(&inode->i_count))
76163997e98SAl Viro continue;
762250df6edSDave Chinner
763250df6edSDave Chinner spin_lock(&inode->i_lock);
7640eed942bSJulian Sun if (atomic_read(&inode->i_count)) {
7650eed942bSJulian Sun spin_unlock(&inode->i_lock);
7660eed942bSJulian Sun continue;
7670eed942bSJulian Sun }
768250df6edSDave Chinner if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
769250df6edSDave Chinner spin_unlock(&inode->i_lock);
77063997e98SAl Viro continue;
771250df6edSDave Chinner }
77263997e98SAl Viro
77363997e98SAl Viro inode->i_state |= I_FREEING;
77402afc410SDave Chinner inode_lru_list_del(inode);
775250df6edSDave Chinner spin_unlock(&inode->i_lock);
77602afc410SDave Chinner list_add(&inode->i_lru, &dispose);
777ac05fbb4SJosef Bacik
778ac05fbb4SJosef Bacik /*
779ac05fbb4SJosef Bacik * We can have a ton of inodes to evict at unmount time given
780ac05fbb4SJosef Bacik * enough memory, check to see if we need to go to sleep for a
781ac05fbb4SJosef Bacik * bit so we don't livelock.
782ac05fbb4SJosef Bacik */
783ac05fbb4SJosef Bacik if (need_resched()) {
784ac05fbb4SJosef Bacik spin_unlock(&sb->s_inode_list_lock);
785ac05fbb4SJosef Bacik cond_resched();
786ac05fbb4SJosef Bacik dispose_list(&dispose);
787ac05fbb4SJosef Bacik goto again;
788ac05fbb4SJosef Bacik }
78963997e98SAl Viro }
79074278da9SDave Chinner spin_unlock(&sb->s_inode_list_lock);
79163997e98SAl Viro
79263997e98SAl Viro dispose_list(&dispose);
79363997e98SAl Viro }
794799ea9e9SDarrick J. Wong EXPORT_SYMBOL_GPL(evict_inodes);
79563997e98SAl Viro
79663997e98SAl Viro /**
797a0318786SChristoph Hellwig * invalidate_inodes - attempt to free all inodes on a superblock
798a0318786SChristoph Hellwig * @sb: superblock to operate on
799a0318786SChristoph Hellwig *
800e127b9bcSChristoph Hellwig * Attempts to free all inodes (including dirty inodes) for a given superblock.
8011da177e4SLinus Torvalds */
invalidate_inodes(struct super_block * sb)802e127b9bcSChristoph Hellwig void invalidate_inodes(struct super_block *sb)
8031da177e4SLinus Torvalds {
804a0318786SChristoph Hellwig struct inode *inode, *next;
805a0318786SChristoph Hellwig LIST_HEAD(dispose);
8061da177e4SLinus Torvalds
80704646aebSEric Sandeen again:
80874278da9SDave Chinner spin_lock(&sb->s_inode_list_lock);
809a0318786SChristoph Hellwig list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
810250df6edSDave Chinner spin_lock(&inode->i_lock);
811250df6edSDave Chinner if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
812250df6edSDave Chinner spin_unlock(&inode->i_lock);
8131da177e4SLinus Torvalds continue;
814250df6edSDave Chinner }
81599a38919SChristoph Hellwig if (atomic_read(&inode->i_count)) {
816250df6edSDave Chinner spin_unlock(&inode->i_lock);
81799a38919SChristoph Hellwig continue;
81899a38919SChristoph Hellwig }
81999a38919SChristoph Hellwig
8201da177e4SLinus Torvalds inode->i_state |= I_FREEING;
82102afc410SDave Chinner inode_lru_list_del(inode);
822250df6edSDave Chinner spin_unlock(&inode->i_lock);
82302afc410SDave Chinner list_add(&inode->i_lru, &dispose);
82404646aebSEric Sandeen if (need_resched()) {
82504646aebSEric Sandeen spin_unlock(&sb->s_inode_list_lock);
82604646aebSEric Sandeen cond_resched();
82704646aebSEric Sandeen dispose_list(&dispose);
82804646aebSEric Sandeen goto again;
82904646aebSEric Sandeen }
8301da177e4SLinus Torvalds }
83174278da9SDave Chinner spin_unlock(&sb->s_inode_list_lock);
8321da177e4SLinus Torvalds
833a0318786SChristoph Hellwig dispose_list(&dispose);
8341da177e4SLinus Torvalds }
8351da177e4SLinus Torvalds
8361da177e4SLinus Torvalds /*
837bc3b14cbSDave Chinner * Isolate the inode from the LRU in preparation for freeing it.
8381da177e4SLinus Torvalds *
8399e38d86fSNick Piggin * If the inode has the I_REFERENCED flag set, then it means that it has been
8409e38d86fSNick Piggin * used recently - the flag is set in iput_final(). When we encounter such an
8419e38d86fSNick Piggin * inode, clear the flag and move it to the back of the LRU so it gets another
8429e38d86fSNick Piggin * pass through the LRU before it gets reclaimed. This is necessary because of
8439e38d86fSNick Piggin * the fact we are doing lazy LRU updates to minimise lock contention so the
8449e38d86fSNick Piggin * LRU does not have strict ordering. Hence we don't want to reclaim inodes
8459e38d86fSNick Piggin * with this flag set because they are the inodes that are out of order.
8461da177e4SLinus Torvalds */
inode_lru_isolate(struct list_head * item,struct list_lru_one * lru,spinlock_t * lru_lock,void * arg)8473f97b163SVladimir Davydov static enum lru_status inode_lru_isolate(struct list_head *item,
8483f97b163SVladimir Davydov struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
8491da177e4SLinus Torvalds {
850bc3b14cbSDave Chinner struct list_head *freeable = arg;
851bc3b14cbSDave Chinner struct inode *inode = container_of(item, struct inode, i_lru);
8521da177e4SLinus Torvalds
8539e38d86fSNick Piggin /*
85451b8c1feSJohannes Weiner * We are inverting the lru lock/inode->i_lock here, so use a
85551b8c1feSJohannes Weiner * trylock. If we fail to get the lock, just skip it.
85602afc410SDave Chinner */
857bc3b14cbSDave Chinner if (!spin_trylock(&inode->i_lock))
858bc3b14cbSDave Chinner return LRU_SKIP;
85902afc410SDave Chinner
86002afc410SDave Chinner /*
86151b8c1feSJohannes Weiner * Inodes can get referenced, redirtied, or repopulated while
86251b8c1feSJohannes Weiner * they're already on the LRU, and this can make them
86351b8c1feSJohannes Weiner * unreclaimable for a while. Remove them lazily here; iput,
86451b8c1feSJohannes Weiner * sync, or the last page cache deletion will requeue them.
8659e38d86fSNick Piggin */
8669e38d86fSNick Piggin if (atomic_read(&inode->i_count) ||
86751b8c1feSJohannes Weiner (inode->i_state & ~I_REFERENCED) ||
86851b8c1feSJohannes Weiner !mapping_shrinkable(&inode->i_data)) {
8693f97b163SVladimir Davydov list_lru_isolate(lru, &inode->i_lru);
870f283c86aSDave Chinner spin_unlock(&inode->i_lock);
871fcb94f72SDave Chinner this_cpu_dec(nr_unused);
872bc3b14cbSDave Chinner return LRU_REMOVED;
8739e38d86fSNick Piggin }
8749e38d86fSNick Piggin
87551b8c1feSJohannes Weiner /* Recently referenced inodes get one more pass */
87669056ee6SDave Chinner if (inode->i_state & I_REFERENCED) {
8779e38d86fSNick Piggin inode->i_state &= ~I_REFERENCED;
878f283c86aSDave Chinner spin_unlock(&inode->i_lock);
879bc3b14cbSDave Chinner return LRU_ROTATE;
8801da177e4SLinus Torvalds }
881bc3b14cbSDave Chinner
88251b8c1feSJohannes Weiner /*
88351b8c1feSJohannes Weiner * On highmem systems, mapping_shrinkable() permits dropping
88451b8c1feSJohannes Weiner * page cache in order to free up struct inodes: lowmem might
88551b8c1feSJohannes Weiner * be under pressure before the cache inside the highmem zone.
88651b8c1feSJohannes Weiner */
8877ae12c80SJohannes Weiner if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
888b9bda5f6SZhihao Cheng inode_pin_lru_isolating(inode);
889250df6edSDave Chinner spin_unlock(&inode->i_lock);
890bc3b14cbSDave Chinner spin_unlock(lru_lock);
891bc3b14cbSDave Chinner if (remove_inode_buffers(inode)) {
892bc3b14cbSDave Chinner unsigned long reap;
893bc3b14cbSDave Chinner reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
894f8891e5eSChristoph Lameter if (current_is_kswapd())
895f8891e5eSChristoph Lameter __count_vm_events(KSWAPD_INODESTEAL, reap);
896f8891e5eSChristoph Lameter else
897f8891e5eSChristoph Lameter __count_vm_events(PGINODESTEAL, reap);
898c7b23b68SYosry Ahmed mm_account_reclaimed_pages(reap);
899bc3b14cbSDave Chinner }
900b9bda5f6SZhihao Cheng inode_unpin_lru_isolating(inode);
901bc3b14cbSDave Chinner spin_lock(lru_lock);
902bc3b14cbSDave Chinner return LRU_RETRY;
903bc3b14cbSDave Chinner }
9041da177e4SLinus Torvalds
905bc3b14cbSDave Chinner WARN_ON(inode->i_state & I_NEW);
906bc3b14cbSDave Chinner inode->i_state |= I_FREEING;
9073f97b163SVladimir Davydov list_lru_isolate_move(lru, &inode->i_lru, freeable);
908bc3b14cbSDave Chinner spin_unlock(&inode->i_lock);
909bc3b14cbSDave Chinner
910bc3b14cbSDave Chinner this_cpu_dec(nr_unused);
911bc3b14cbSDave Chinner return LRU_REMOVED;
912bc3b14cbSDave Chinner }
913bc3b14cbSDave Chinner
914bc3b14cbSDave Chinner /*
915bc3b14cbSDave Chinner * Walk the superblock inode LRU for freeable inodes and attempt to free them.
916bc3b14cbSDave Chinner * This is called from the superblock shrinker function with a number of inodes
917bc3b14cbSDave Chinner * to trim from the LRU. Inodes to be freed are moved to a temporary list and
918bc3b14cbSDave Chinner * then are freed outside inode_lock by dispose_list().
919bc3b14cbSDave Chinner */
prune_icache_sb(struct super_block * sb,struct shrink_control * sc)920503c358cSVladimir Davydov long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
921bc3b14cbSDave Chinner {
922bc3b14cbSDave Chinner LIST_HEAD(freeable);
923bc3b14cbSDave Chinner long freed;
924bc3b14cbSDave Chinner
925503c358cSVladimir Davydov freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
926503c358cSVladimir Davydov inode_lru_isolate, &freeable);
9271da177e4SLinus Torvalds dispose_list(&freeable);
9280a234c6dSDave Chinner return freed;
9291da177e4SLinus Torvalds }
9301da177e4SLinus Torvalds
9311da177e4SLinus Torvalds static void __wait_on_freeing_inode(struct inode *inode);
9321da177e4SLinus Torvalds /*
9331da177e4SLinus Torvalds * Called with the inode lock held.
9341da177e4SLinus Torvalds */
find_inode(struct super_block * sb,struct hlist_head * head,int (* test)(struct inode *,void *),void * data)9356b3304b5SManish Katiyar static struct inode *find_inode(struct super_block *sb,
9366b3304b5SManish Katiyar struct hlist_head *head,
9376b3304b5SManish Katiyar int (*test)(struct inode *, void *),
9386b3304b5SManish Katiyar void *data)
9391da177e4SLinus Torvalds {
9401da177e4SLinus Torvalds struct inode *inode = NULL;
9411da177e4SLinus Torvalds
9421da177e4SLinus Torvalds repeat:
943b67bfe0dSSasha Levin hlist_for_each_entry(inode, head, i_hash) {
9445a3cd992SAl Viro if (inode->i_sb != sb)
9455a3cd992SAl Viro continue;
9465a3cd992SAl Viro if (!test(inode, data))
9475a3cd992SAl Viro continue;
948250df6edSDave Chinner spin_lock(&inode->i_lock);
949a4ffdde6SAl Viro if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
9501da177e4SLinus Torvalds __wait_on_freeing_inode(inode);
9511da177e4SLinus Torvalds goto repeat;
9521da177e4SLinus Torvalds }
953c2b6d621SAl Viro if (unlikely(inode->i_state & I_CREATING)) {
954c2b6d621SAl Viro spin_unlock(&inode->i_lock);
955c2b6d621SAl Viro return ERR_PTR(-ESTALE);
956c2b6d621SAl Viro }
957f7899bd5SChristoph Hellwig __iget(inode);
958250df6edSDave Chinner spin_unlock(&inode->i_lock);
959f7899bd5SChristoph Hellwig return inode;
9601da177e4SLinus Torvalds }
961f7899bd5SChristoph Hellwig return NULL;
9621da177e4SLinus Torvalds }
9631da177e4SLinus Torvalds
9641da177e4SLinus Torvalds /*
9651da177e4SLinus Torvalds * find_inode_fast is the fast path version of find_inode, see the comment at
9661da177e4SLinus Torvalds * iget_locked for details.
9671da177e4SLinus Torvalds */
find_inode_fast(struct super_block * sb,struct hlist_head * head,unsigned long ino)9686b3304b5SManish Katiyar static struct inode *find_inode_fast(struct super_block *sb,
9696b3304b5SManish Katiyar struct hlist_head *head, unsigned long ino)
9701da177e4SLinus Torvalds {
9711da177e4SLinus Torvalds struct inode *inode = NULL;
9721da177e4SLinus Torvalds
9731da177e4SLinus Torvalds repeat:
974b67bfe0dSSasha Levin hlist_for_each_entry(inode, head, i_hash) {
9755a3cd992SAl Viro if (inode->i_ino != ino)
9765a3cd992SAl Viro continue;
9775a3cd992SAl Viro if (inode->i_sb != sb)
9785a3cd992SAl Viro continue;
979250df6edSDave Chinner spin_lock(&inode->i_lock);
980a4ffdde6SAl Viro if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
9811da177e4SLinus Torvalds __wait_on_freeing_inode(inode);
9821da177e4SLinus Torvalds goto repeat;
9831da177e4SLinus Torvalds }
984c2b6d621SAl Viro if (unlikely(inode->i_state & I_CREATING)) {
985c2b6d621SAl Viro spin_unlock(&inode->i_lock);
986c2b6d621SAl Viro return ERR_PTR(-ESTALE);
987c2b6d621SAl Viro }
988f7899bd5SChristoph Hellwig __iget(inode);
989250df6edSDave Chinner spin_unlock(&inode->i_lock);
990f7899bd5SChristoph Hellwig return inode;
9911da177e4SLinus Torvalds }
992f7899bd5SChristoph Hellwig return NULL;
9931da177e4SLinus Torvalds }
9941da177e4SLinus Torvalds
995f991bd2eSEric Dumazet /*
996f991bd2eSEric Dumazet * Each cpu owns a range of LAST_INO_BATCH numbers.
997f991bd2eSEric Dumazet * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
998f991bd2eSEric Dumazet * to renew the exhausted range.
9998290c35fSDavid Chinner *
1000f991bd2eSEric Dumazet * This does not significantly increase overflow rate because every CPU can
1001f991bd2eSEric Dumazet * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
1002f991bd2eSEric Dumazet * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
1003f991bd2eSEric Dumazet * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
1004f991bd2eSEric Dumazet * overflow rate by 2x, which does not seem too significant.
1005f991bd2eSEric Dumazet *
1006f991bd2eSEric Dumazet * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
1007f991bd2eSEric Dumazet * error if st_ino won't fit in target struct field. Use 32bit counter
1008f991bd2eSEric Dumazet * here to attempt to avoid that.
10098290c35fSDavid Chinner */
1010f991bd2eSEric Dumazet #define LAST_INO_BATCH 1024
1011f991bd2eSEric Dumazet static DEFINE_PER_CPU(unsigned int, last_ino);
10128290c35fSDavid Chinner
get_next_ino(void)101385fe4025SChristoph Hellwig unsigned int get_next_ino(void)
1014f991bd2eSEric Dumazet {
1015f991bd2eSEric Dumazet unsigned int *p = &get_cpu_var(last_ino);
1016f991bd2eSEric Dumazet unsigned int res = *p;
1017f991bd2eSEric Dumazet
1018f991bd2eSEric Dumazet #ifdef CONFIG_SMP
1019f991bd2eSEric Dumazet if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
1020f991bd2eSEric Dumazet static atomic_t shared_last_ino;
1021f991bd2eSEric Dumazet int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
1022f991bd2eSEric Dumazet
1023f991bd2eSEric Dumazet res = next - LAST_INO_BATCH;
10248290c35fSDavid Chinner }
1025f991bd2eSEric Dumazet #endif
1026f991bd2eSEric Dumazet
10272adc376cSCarlos Maiolino res++;
10282adc376cSCarlos Maiolino /* get_next_ino should not provide a 0 inode number */
10292adc376cSCarlos Maiolino if (unlikely(!res))
10302adc376cSCarlos Maiolino res++;
10312adc376cSCarlos Maiolino *p = res;
1032f991bd2eSEric Dumazet put_cpu_var(last_ino);
1033f991bd2eSEric Dumazet return res;
1034f991bd2eSEric Dumazet }
103585fe4025SChristoph Hellwig EXPORT_SYMBOL(get_next_ino);
10368290c35fSDavid Chinner
10371da177e4SLinus Torvalds /**
1038a209dfc7SEric Dumazet * new_inode_pseudo - obtain an inode
1039a209dfc7SEric Dumazet * @sb: superblock
1040a209dfc7SEric Dumazet *
1041a209dfc7SEric Dumazet * Allocates a new inode for given superblock.
1042a209dfc7SEric Dumazet * Inode wont be chained in superblock s_inodes list
1043a209dfc7SEric Dumazet * This means :
1044a209dfc7SEric Dumazet * - fs can't be unmount
1045a209dfc7SEric Dumazet * - quotas, fsnotify, writeback can't work
1046a209dfc7SEric Dumazet */
new_inode_pseudo(struct super_block * sb)1047a209dfc7SEric Dumazet struct inode *new_inode_pseudo(struct super_block *sb)
1048a209dfc7SEric Dumazet {
1049a209dfc7SEric Dumazet struct inode *inode = alloc_inode(sb);
1050a209dfc7SEric Dumazet
1051a209dfc7SEric Dumazet if (inode) {
1052a209dfc7SEric Dumazet spin_lock(&inode->i_lock);
1053a209dfc7SEric Dumazet inode->i_state = 0;
1054a209dfc7SEric Dumazet spin_unlock(&inode->i_lock);
1055a209dfc7SEric Dumazet }
1056a209dfc7SEric Dumazet return inode;
1057a209dfc7SEric Dumazet }
1058a209dfc7SEric Dumazet
1059a209dfc7SEric Dumazet /**
10601da177e4SLinus Torvalds * new_inode - obtain an inode
10611da177e4SLinus Torvalds * @sb: superblock
10621da177e4SLinus Torvalds *
1063769848c0SMel Gorman * Allocates a new inode for given superblock. The default gfp_mask
10643c1d4378SHugh Dickins * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
1065769848c0SMel Gorman * If HIGHMEM pages are unsuitable or it is known that pages allocated
1066769848c0SMel Gorman * for the page cache are not reclaimable or migratable,
1067769848c0SMel Gorman * mapping_set_gfp_mask() must be called with suitable flags on the
1068769848c0SMel Gorman * newly created inode's mapping
1069769848c0SMel Gorman *
10701da177e4SLinus Torvalds */
new_inode(struct super_block * sb)10711da177e4SLinus Torvalds struct inode *new_inode(struct super_block *sb)
10721da177e4SLinus Torvalds {
10731da177e4SLinus Torvalds struct inode *inode;
10741da177e4SLinus Torvalds
1075a209dfc7SEric Dumazet inode = new_inode_pseudo(sb);
1076a209dfc7SEric Dumazet if (inode)
107755fa6091SDave Chinner inode_sb_list_add(inode);
10781da177e4SLinus Torvalds return inode;
10791da177e4SLinus Torvalds }
10801da177e4SLinus Torvalds EXPORT_SYMBOL(new_inode);
10811da177e4SLinus Torvalds
108214358e6dSPeter Zijlstra #ifdef CONFIG_DEBUG_LOCK_ALLOC
lockdep_annotate_inode_mutex_key(struct inode * inode)1083e096d0c7SJosh Boyer void lockdep_annotate_inode_mutex_key(struct inode *inode)
1084e096d0c7SJosh Boyer {
1085a3314a0eSNamhyung Kim if (S_ISDIR(inode->i_mode)) {
108614358e6dSPeter Zijlstra struct file_system_type *type = inode->i_sb->s_type;
10871e89a5e1SPeter Zijlstra
10889a7aa12fSJan Kara /* Set new key only if filesystem hasn't already changed it */
10899902af79SAl Viro if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
109014358e6dSPeter Zijlstra /*
109114358e6dSPeter Zijlstra * ensure nobody is actually holding i_mutex
109214358e6dSPeter Zijlstra */
10939902af79SAl Viro // mutex_destroy(&inode->i_mutex);
10949902af79SAl Viro init_rwsem(&inode->i_rwsem);
10959902af79SAl Viro lockdep_set_class(&inode->i_rwsem,
10969a7aa12fSJan Kara &type->i_mutex_dir_key);
10979a7aa12fSJan Kara }
10981e89a5e1SPeter Zijlstra }
1099e096d0c7SJosh Boyer }
1100e096d0c7SJosh Boyer EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
110114358e6dSPeter Zijlstra #endif
1102e096d0c7SJosh Boyer
1103e096d0c7SJosh Boyer /**
1104e096d0c7SJosh Boyer * unlock_new_inode - clear the I_NEW state and wake up any waiters
1105e096d0c7SJosh Boyer * @inode: new inode to unlock
1106e096d0c7SJosh Boyer *
1107e096d0c7SJosh Boyer * Called when the inode is fully initialised to clear the new state of the
1108e096d0c7SJosh Boyer * inode and wake up anyone waiting for the inode to finish initialisation.
1109e096d0c7SJosh Boyer */
unlock_new_inode(struct inode * inode)1110e096d0c7SJosh Boyer void unlock_new_inode(struct inode *inode)
1111e096d0c7SJosh Boyer {
1112e096d0c7SJosh Boyer lockdep_annotate_inode_mutex_key(inode);
1113250df6edSDave Chinner spin_lock(&inode->i_lock);
1114eaff8079SChristoph Hellwig WARN_ON(!(inode->i_state & I_NEW));
1115c2b6d621SAl Viro inode->i_state &= ~I_NEW & ~I_CREATING;
1116310fa7a3SAl Viro smp_mb();
1117250df6edSDave Chinner wake_up_bit(&inode->i_state, __I_NEW);
1118250df6edSDave Chinner spin_unlock(&inode->i_lock);
11191da177e4SLinus Torvalds }
11201da177e4SLinus Torvalds EXPORT_SYMBOL(unlock_new_inode);
11211da177e4SLinus Torvalds
discard_new_inode(struct inode * inode)1122c2b6d621SAl Viro void discard_new_inode(struct inode *inode)
1123c2b6d621SAl Viro {
1124c2b6d621SAl Viro lockdep_annotate_inode_mutex_key(inode);
1125c2b6d621SAl Viro spin_lock(&inode->i_lock);
1126c2b6d621SAl Viro WARN_ON(!(inode->i_state & I_NEW));
1127c2b6d621SAl Viro inode->i_state &= ~I_NEW;
1128c2b6d621SAl Viro smp_mb();
1129c2b6d621SAl Viro wake_up_bit(&inode->i_state, __I_NEW);
1130c2b6d621SAl Viro spin_unlock(&inode->i_lock);
1131c2b6d621SAl Viro iput(inode);
1132c2b6d621SAl Viro }
1133c2b6d621SAl Viro EXPORT_SYMBOL(discard_new_inode);
1134c2b6d621SAl Viro
11350b2d0724SChristoph Hellwig /**
1136f23ce757SJan Kara * lock_two_inodes - lock two inodes (may be regular files but also dirs)
1137f23ce757SJan Kara *
1138f23ce757SJan Kara * Lock any non-NULL argument. The caller must make sure that if he is passing
1139f23ce757SJan Kara * in two directories, one is not ancestor of the other. Zero, one or two
1140f23ce757SJan Kara * objects may be locked by this function.
1141f23ce757SJan Kara *
1142f23ce757SJan Kara * @inode1: first inode to lock
1143f23ce757SJan Kara * @inode2: second inode to lock
1144f23ce757SJan Kara * @subclass1: inode lock subclass for the first lock obtained
1145f23ce757SJan Kara * @subclass2: inode lock subclass for the second lock obtained
1146f23ce757SJan Kara */
lock_two_inodes(struct inode * inode1,struct inode * inode2,unsigned subclass1,unsigned subclass2)1147f23ce757SJan Kara void lock_two_inodes(struct inode *inode1, struct inode *inode2,
1148f23ce757SJan Kara unsigned subclass1, unsigned subclass2)
1149f23ce757SJan Kara {
1150f23ce757SJan Kara if (!inode1 || !inode2) {
1151f23ce757SJan Kara /*
1152f23ce757SJan Kara * Make sure @subclass1 will be used for the acquired lock.
1153f23ce757SJan Kara * This is not strictly necessary (no current caller cares) but
1154f23ce757SJan Kara * let's keep things consistent.
1155f23ce757SJan Kara */
1156f23ce757SJan Kara if (!inode1)
1157f23ce757SJan Kara swap(inode1, inode2);
1158f23ce757SJan Kara goto lock;
1159f23ce757SJan Kara }
1160f23ce757SJan Kara
1161f23ce757SJan Kara /*
1162f23ce757SJan Kara * If one object is directory and the other is not, we must make sure
1163f23ce757SJan Kara * to lock directory first as the other object may be its child.
1164f23ce757SJan Kara */
1165f23ce757SJan Kara if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
1166f23ce757SJan Kara if (inode1 > inode2)
1167f23ce757SJan Kara swap(inode1, inode2);
1168f23ce757SJan Kara } else if (!S_ISDIR(inode1->i_mode))
1169f23ce757SJan Kara swap(inode1, inode2);
1170f23ce757SJan Kara lock:
1171f23ce757SJan Kara if (inode1)
1172f23ce757SJan Kara inode_lock_nested(inode1, subclass1);
1173f23ce757SJan Kara if (inode2 && inode2 != inode1)
1174f23ce757SJan Kara inode_lock_nested(inode2, subclass2);
1175f23ce757SJan Kara }
1176f23ce757SJan Kara
1177f23ce757SJan Kara /**
1178375e289eSJ. Bruce Fields * lock_two_nondirectories - take two i_mutexes on non-directory objects
11794fd699aeSJ. Bruce Fields *
11802454ad83SJan Kara * Lock any non-NULL argument. Passed objects must not be directories.
11814fd699aeSJ. Bruce Fields * Zero, one or two objects may be locked by this function.
11824fd699aeSJ. Bruce Fields *
1183375e289eSJ. Bruce Fields * @inode1: first inode to lock
1184375e289eSJ. Bruce Fields * @inode2: second inode to lock
1185375e289eSJ. Bruce Fields */
lock_two_nondirectories(struct inode * inode1,struct inode * inode2)1186375e289eSJ. Bruce Fields void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1187375e289eSJ. Bruce Fields {
118833ab231fSChristian Brauner if (inode1)
11892454ad83SJan Kara WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
119033ab231fSChristian Brauner if (inode2)
11912454ad83SJan Kara WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
11922454ad83SJan Kara lock_two_inodes(inode1, inode2, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
1193375e289eSJ. Bruce Fields }
1194375e289eSJ. Bruce Fields EXPORT_SYMBOL(lock_two_nondirectories);
1195375e289eSJ. Bruce Fields
1196375e289eSJ. Bruce Fields /**
1197375e289eSJ. Bruce Fields * unlock_two_nondirectories - release locks from lock_two_nondirectories()
1198375e289eSJ. Bruce Fields * @inode1: first inode to unlock
1199375e289eSJ. Bruce Fields * @inode2: second inode to unlock
1200375e289eSJ. Bruce Fields */
unlock_two_nondirectories(struct inode * inode1,struct inode * inode2)1201375e289eSJ. Bruce Fields void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1202375e289eSJ. Bruce Fields {
12032454ad83SJan Kara if (inode1) {
12042454ad83SJan Kara WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
12055955102cSAl Viro inode_unlock(inode1);
12062454ad83SJan Kara }
12072454ad83SJan Kara if (inode2 && inode2 != inode1) {
12082454ad83SJan Kara WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
12095955102cSAl Viro inode_unlock(inode2);
1210375e289eSJ. Bruce Fields }
12112454ad83SJan Kara }
1212375e289eSJ. Bruce Fields EXPORT_SYMBOL(unlock_two_nondirectories);
1213375e289eSJ. Bruce Fields
1214375e289eSJ. Bruce Fields /**
121580ea09a0SMiklos Szeredi * inode_insert5 - obtain an inode from a mounted file system
121680ea09a0SMiklos Szeredi * @inode: pre-allocated inode to use for insert to cache
121780ea09a0SMiklos Szeredi * @hashval: hash value (usually inode number) to get
121880ea09a0SMiklos Szeredi * @test: callback used for comparisons between inodes
121980ea09a0SMiklos Szeredi * @set: callback used to initialize a new struct inode
122080ea09a0SMiklos Szeredi * @data: opaque data pointer to pass to @test and @set
122180ea09a0SMiklos Szeredi *
122280ea09a0SMiklos Szeredi * Search for the inode specified by @hashval and @data in the inode cache,
122380ea09a0SMiklos Szeredi * and if present it is return it with an increased reference count. This is
122480ea09a0SMiklos Szeredi * a variant of iget5_locked() for callers that don't want to fail on memory
122580ea09a0SMiklos Szeredi * allocation of inode.
122680ea09a0SMiklos Szeredi *
122780ea09a0SMiklos Szeredi * If the inode is not in cache, insert the pre-allocated inode to cache and
122880ea09a0SMiklos Szeredi * return it locked, hashed, and with the I_NEW flag set. The file system gets
122980ea09a0SMiklos Szeredi * to fill it in before unlocking it via unlock_new_inode().
123080ea09a0SMiklos Szeredi *
123180ea09a0SMiklos Szeredi * Note both @test and @set are called with the inode_hash_lock held, so can't
123280ea09a0SMiklos Szeredi * sleep.
123380ea09a0SMiklos Szeredi */
inode_insert5(struct inode * inode,unsigned long hashval,int (* test)(struct inode *,void *),int (* set)(struct inode *,void *),void * data)123480ea09a0SMiklos Szeredi struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
123580ea09a0SMiklos Szeredi int (*test)(struct inode *, void *),
123680ea09a0SMiklos Szeredi int (*set)(struct inode *, void *), void *data)
123780ea09a0SMiklos Szeredi {
123880ea09a0SMiklos Szeredi struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
123980ea09a0SMiklos Szeredi struct inode *old;
124080ea09a0SMiklos Szeredi
124180ea09a0SMiklos Szeredi again:
124280ea09a0SMiklos Szeredi spin_lock(&inode_hash_lock);
124380ea09a0SMiklos Szeredi old = find_inode(inode->i_sb, head, test, data);
124480ea09a0SMiklos Szeredi if (unlikely(old)) {
124580ea09a0SMiklos Szeredi /*
124680ea09a0SMiklos Szeredi * Uhhuh, somebody else created the same inode under us.
124780ea09a0SMiklos Szeredi * Use the old inode instead of the preallocated one.
124880ea09a0SMiklos Szeredi */
124980ea09a0SMiklos Szeredi spin_unlock(&inode_hash_lock);
1250c2b6d621SAl Viro if (IS_ERR(old))
1251c2b6d621SAl Viro return NULL;
125280ea09a0SMiklos Szeredi wait_on_inode(old);
125380ea09a0SMiklos Szeredi if (unlikely(inode_unhashed(old))) {
125480ea09a0SMiklos Szeredi iput(old);
125580ea09a0SMiklos Szeredi goto again;
125680ea09a0SMiklos Szeredi }
125780ea09a0SMiklos Szeredi return old;
125880ea09a0SMiklos Szeredi }
125980ea09a0SMiklos Szeredi
126080ea09a0SMiklos Szeredi if (set && unlikely(set(inode, data))) {
126180ea09a0SMiklos Szeredi inode = NULL;
126280ea09a0SMiklos Szeredi goto unlock;
126380ea09a0SMiklos Szeredi }
126480ea09a0SMiklos Szeredi
126580ea09a0SMiklos Szeredi /*
126680ea09a0SMiklos Szeredi * Return the locked inode with I_NEW set, the
126780ea09a0SMiklos Szeredi * caller is responsible for filling in the contents
126880ea09a0SMiklos Szeredi */
126980ea09a0SMiklos Szeredi spin_lock(&inode->i_lock);
127080ea09a0SMiklos Szeredi inode->i_state |= I_NEW;
12713f19b2abSDavid Howells hlist_add_head_rcu(&inode->i_hash, head);
127280ea09a0SMiklos Szeredi spin_unlock(&inode->i_lock);
127318cc912bSJeff Layton
127418cc912bSJeff Layton /*
127518cc912bSJeff Layton * Add inode to the sb list if it's not already. It has I_NEW at this
127618cc912bSJeff Layton * point, so it should be safe to test i_sb_list locklessly.
127718cc912bSJeff Layton */
127818cc912bSJeff Layton if (list_empty(&inode->i_sb_list))
1279e950564bSMiklos Szeredi inode_sb_list_add(inode);
128080ea09a0SMiklos Szeredi unlock:
128180ea09a0SMiklos Szeredi spin_unlock(&inode_hash_lock);
128280ea09a0SMiklos Szeredi
128380ea09a0SMiklos Szeredi return inode;
128480ea09a0SMiklos Szeredi }
128580ea09a0SMiklos Szeredi EXPORT_SYMBOL(inode_insert5);
128680ea09a0SMiklos Szeredi
128780ea09a0SMiklos Szeredi /**
12880b2d0724SChristoph Hellwig * iget5_locked - obtain an inode from a mounted file system
12890b2d0724SChristoph Hellwig * @sb: super block of file system
12900b2d0724SChristoph Hellwig * @hashval: hash value (usually inode number) to get
12910b2d0724SChristoph Hellwig * @test: callback used for comparisons between inodes
12920b2d0724SChristoph Hellwig * @set: callback used to initialize a new struct inode
12930b2d0724SChristoph Hellwig * @data: opaque data pointer to pass to @test and @set
12941da177e4SLinus Torvalds *
12950b2d0724SChristoph Hellwig * Search for the inode specified by @hashval and @data in the inode cache,
12960b2d0724SChristoph Hellwig * and if present it is return it with an increased reference count. This is
12970b2d0724SChristoph Hellwig * a generalized version of iget_locked() for file systems where the inode
12980b2d0724SChristoph Hellwig * number is not sufficient for unique identification of an inode.
12990b2d0724SChristoph Hellwig *
13000b2d0724SChristoph Hellwig * If the inode is not in cache, allocate a new inode and return it locked,
13010b2d0724SChristoph Hellwig * hashed, and with the I_NEW flag set. The file system gets to fill it in
13020b2d0724SChristoph Hellwig * before unlocking it via unlock_new_inode().
13030b2d0724SChristoph Hellwig *
13040b2d0724SChristoph Hellwig * Note both @test and @set are called with the inode_hash_lock held, so can't
13050b2d0724SChristoph Hellwig * sleep.
13061da177e4SLinus Torvalds */
iget5_locked(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),int (* set)(struct inode *,void *),void * data)13070b2d0724SChristoph Hellwig struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
13086b3304b5SManish Katiyar int (*test)(struct inode *, void *),
13090b2d0724SChristoph Hellwig int (*set)(struct inode *, void *), void *data)
13101da177e4SLinus Torvalds {
131180ea09a0SMiklos Szeredi struct inode *inode = ilookup5(sb, hashval, test, data);
13120b2d0724SChristoph Hellwig
131380ea09a0SMiklos Szeredi if (!inode) {
1314e950564bSMiklos Szeredi struct inode *new = alloc_inode(sb);
13150b2d0724SChristoph Hellwig
131680ea09a0SMiklos Szeredi if (new) {
1317e950564bSMiklos Szeredi new->i_state = 0;
131880ea09a0SMiklos Szeredi inode = inode_insert5(new, hashval, test, set, data);
131980ea09a0SMiklos Szeredi if (unlikely(inode != new))
1320e950564bSMiklos Szeredi destroy_inode(new);
13212864f301SAl Viro }
13221da177e4SLinus Torvalds }
13231da177e4SLinus Torvalds return inode;
13241da177e4SLinus Torvalds }
13250b2d0724SChristoph Hellwig EXPORT_SYMBOL(iget5_locked);
13261da177e4SLinus Torvalds
13270b2d0724SChristoph Hellwig /**
13280b2d0724SChristoph Hellwig * iget_locked - obtain an inode from a mounted file system
13290b2d0724SChristoph Hellwig * @sb: super block of file system
13300b2d0724SChristoph Hellwig * @ino: inode number to get
13310b2d0724SChristoph Hellwig *
13320b2d0724SChristoph Hellwig * Search for the inode specified by @ino in the inode cache and if present
13330b2d0724SChristoph Hellwig * return it with an increased reference count. This is for file systems
13340b2d0724SChristoph Hellwig * where the inode number is sufficient for unique identification of an inode.
13350b2d0724SChristoph Hellwig *
13360b2d0724SChristoph Hellwig * If the inode is not in cache, allocate a new inode and return it locked,
13370b2d0724SChristoph Hellwig * hashed, and with the I_NEW flag set. The file system gets to fill it in
13380b2d0724SChristoph Hellwig * before unlocking it via unlock_new_inode().
13391da177e4SLinus Torvalds */
iget_locked(struct super_block * sb,unsigned long ino)13400b2d0724SChristoph Hellwig struct inode *iget_locked(struct super_block *sb, unsigned long ino)
13411da177e4SLinus Torvalds {
13420b2d0724SChristoph Hellwig struct hlist_head *head = inode_hashtable + hash(sb, ino);
13431da177e4SLinus Torvalds struct inode *inode;
13442864f301SAl Viro again:
13450b2d0724SChristoph Hellwig spin_lock(&inode_hash_lock);
13460b2d0724SChristoph Hellwig inode = find_inode_fast(sb, head, ino);
13470b2d0724SChristoph Hellwig spin_unlock(&inode_hash_lock);
13480b2d0724SChristoph Hellwig if (inode) {
1349c2b6d621SAl Viro if (IS_ERR(inode))
1350c2b6d621SAl Viro return NULL;
13510b2d0724SChristoph Hellwig wait_on_inode(inode);
13522864f301SAl Viro if (unlikely(inode_unhashed(inode))) {
13532864f301SAl Viro iput(inode);
13542864f301SAl Viro goto again;
13552864f301SAl Viro }
13560b2d0724SChristoph Hellwig return inode;
13570b2d0724SChristoph Hellwig }
13580b2d0724SChristoph Hellwig
13591da177e4SLinus Torvalds inode = alloc_inode(sb);
13601da177e4SLinus Torvalds if (inode) {
13611da177e4SLinus Torvalds struct inode *old;
13621da177e4SLinus Torvalds
136367a23c49SDave Chinner spin_lock(&inode_hash_lock);
13641da177e4SLinus Torvalds /* We released the lock, so.. */
13651da177e4SLinus Torvalds old = find_inode_fast(sb, head, ino);
13661da177e4SLinus Torvalds if (!old) {
13671da177e4SLinus Torvalds inode->i_ino = ino;
1368250df6edSDave Chinner spin_lock(&inode->i_lock);
1369eaff8079SChristoph Hellwig inode->i_state = I_NEW;
13703f19b2abSDavid Howells hlist_add_head_rcu(&inode->i_hash, head);
1371250df6edSDave Chinner spin_unlock(&inode->i_lock);
137255fa6091SDave Chinner inode_sb_list_add(inode);
137367a23c49SDave Chinner spin_unlock(&inode_hash_lock);
13741da177e4SLinus Torvalds
13751da177e4SLinus Torvalds /* Return the locked inode with I_NEW set, the
13761da177e4SLinus Torvalds * caller is responsible for filling in the contents
13771da177e4SLinus Torvalds */
13781da177e4SLinus Torvalds return inode;
13791da177e4SLinus Torvalds }
13801da177e4SLinus Torvalds
13811da177e4SLinus Torvalds /*
13821da177e4SLinus Torvalds * Uhhuh, somebody else created the same inode under
13831da177e4SLinus Torvalds * us. Use the old inode instead of the one we just
13841da177e4SLinus Torvalds * allocated.
13851da177e4SLinus Torvalds */
138667a23c49SDave Chinner spin_unlock(&inode_hash_lock);
13871da177e4SLinus Torvalds destroy_inode(inode);
1388c2b6d621SAl Viro if (IS_ERR(old))
1389c2b6d621SAl Viro return NULL;
13901da177e4SLinus Torvalds inode = old;
13911da177e4SLinus Torvalds wait_on_inode(inode);
13922864f301SAl Viro if (unlikely(inode_unhashed(inode))) {
13932864f301SAl Viro iput(inode);
13942864f301SAl Viro goto again;
13952864f301SAl Viro }
13961da177e4SLinus Torvalds }
13971da177e4SLinus Torvalds return inode;
13981da177e4SLinus Torvalds }
13990b2d0724SChristoph Hellwig EXPORT_SYMBOL(iget_locked);
14001da177e4SLinus Torvalds
1401ad5e195aSChristoph Hellwig /*
1402ad5e195aSChristoph Hellwig * search the inode cache for a matching inode number.
1403ad5e195aSChristoph Hellwig * If we find one, then the inode number we are trying to
1404ad5e195aSChristoph Hellwig * allocate is not unique and so we should not use it.
1405ad5e195aSChristoph Hellwig *
1406ad5e195aSChristoph Hellwig * Returns 1 if the inode number is unique, 0 if it is not.
1407ad5e195aSChristoph Hellwig */
test_inode_iunique(struct super_block * sb,unsigned long ino)1408ad5e195aSChristoph Hellwig static int test_inode_iunique(struct super_block *sb, unsigned long ino)
1409ad5e195aSChristoph Hellwig {
1410ad5e195aSChristoph Hellwig struct hlist_head *b = inode_hashtable + hash(sb, ino);
1411ad5e195aSChristoph Hellwig struct inode *inode;
1412ad5e195aSChristoph Hellwig
14133f19b2abSDavid Howells hlist_for_each_entry_rcu(inode, b, i_hash) {
14143f19b2abSDavid Howells if (inode->i_ino == ino && inode->i_sb == sb)
1415ad5e195aSChristoph Hellwig return 0;
1416ad5e195aSChristoph Hellwig }
1417ad5e195aSChristoph Hellwig return 1;
1418ad5e195aSChristoph Hellwig }
1419ad5e195aSChristoph Hellwig
14201da177e4SLinus Torvalds /**
14211da177e4SLinus Torvalds * iunique - get a unique inode number
14221da177e4SLinus Torvalds * @sb: superblock
14231da177e4SLinus Torvalds * @max_reserved: highest reserved inode number
14241da177e4SLinus Torvalds *
14251da177e4SLinus Torvalds * Obtain an inode number that is unique on the system for a given
14261da177e4SLinus Torvalds * superblock. This is used by file systems that have no natural
14271da177e4SLinus Torvalds * permanent inode numbering system. An inode number is returned that
14281da177e4SLinus Torvalds * is higher than the reserved limit but unique.
14291da177e4SLinus Torvalds *
14301da177e4SLinus Torvalds * BUGS:
14311da177e4SLinus Torvalds * With a large number of inodes live on the file system this function
14321da177e4SLinus Torvalds * currently becomes quite slow.
14331da177e4SLinus Torvalds */
iunique(struct super_block * sb,ino_t max_reserved)14341da177e4SLinus Torvalds ino_t iunique(struct super_block *sb, ino_t max_reserved)
14351da177e4SLinus Torvalds {
1436866b04fcSJeff Layton /*
1437866b04fcSJeff Layton * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
1438866b04fcSJeff Layton * error if st_ino won't fit in target struct field. Use 32bit counter
1439866b04fcSJeff Layton * here to attempt to avoid that.
1440866b04fcSJeff Layton */
1441ad5e195aSChristoph Hellwig static DEFINE_SPINLOCK(iunique_lock);
1442866b04fcSJeff Layton static unsigned int counter;
14431da177e4SLinus Torvalds ino_t res;
14443361c7beSJeffrey Layton
14453f19b2abSDavid Howells rcu_read_lock();
1446ad5e195aSChristoph Hellwig spin_lock(&iunique_lock);
14473361c7beSJeffrey Layton do {
14483361c7beSJeffrey Layton if (counter <= max_reserved)
14493361c7beSJeffrey Layton counter = max_reserved + 1;
14501da177e4SLinus Torvalds res = counter++;
1451ad5e195aSChristoph Hellwig } while (!test_inode_iunique(sb, res));
1452ad5e195aSChristoph Hellwig spin_unlock(&iunique_lock);
14533f19b2abSDavid Howells rcu_read_unlock();
14543361c7beSJeffrey Layton
14551da177e4SLinus Torvalds return res;
14561da177e4SLinus Torvalds }
14571da177e4SLinus Torvalds EXPORT_SYMBOL(iunique);
14581da177e4SLinus Torvalds
igrab(struct inode * inode)14591da177e4SLinus Torvalds struct inode *igrab(struct inode *inode)
14601da177e4SLinus Torvalds {
1461250df6edSDave Chinner spin_lock(&inode->i_lock);
1462250df6edSDave Chinner if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
14631da177e4SLinus Torvalds __iget(inode);
1464250df6edSDave Chinner spin_unlock(&inode->i_lock);
1465250df6edSDave Chinner } else {
1466250df6edSDave Chinner spin_unlock(&inode->i_lock);
14671da177e4SLinus Torvalds /*
14681da177e4SLinus Torvalds * Handle the case where s_op->clear_inode is not been
14691da177e4SLinus Torvalds * called yet, and somebody is calling igrab
14701da177e4SLinus Torvalds * while the inode is getting freed.
14711da177e4SLinus Torvalds */
14721da177e4SLinus Torvalds inode = NULL;
1473250df6edSDave Chinner }
14741da177e4SLinus Torvalds return inode;
14751da177e4SLinus Torvalds }
14761da177e4SLinus Torvalds EXPORT_SYMBOL(igrab);
14771da177e4SLinus Torvalds
14781da177e4SLinus Torvalds /**
147988bd5121SAnton Altaparmakov * ilookup5_nowait - search for an inode in the inode cache
14801da177e4SLinus Torvalds * @sb: super block of file system to search
14811da177e4SLinus Torvalds * @hashval: hash value (usually inode number) to search for
14821da177e4SLinus Torvalds * @test: callback used for comparisons between inodes
14831da177e4SLinus Torvalds * @data: opaque data pointer to pass to @test
14841da177e4SLinus Torvalds *
14850b2d0724SChristoph Hellwig * Search for the inode specified by @hashval and @data in the inode cache.
14861da177e4SLinus Torvalds * If the inode is in the cache, the inode is returned with an incremented
14870b2d0724SChristoph Hellwig * reference count.
148888bd5121SAnton Altaparmakov *
14890b2d0724SChristoph Hellwig * Note: I_NEW is not waited upon so you have to be very careful what you do
14900b2d0724SChristoph Hellwig * with the returned inode. You probably should be using ilookup5() instead.
149188bd5121SAnton Altaparmakov *
1492b6d0ad68SRandy Dunlap * Note2: @test is called with the inode_hash_lock held, so can't sleep.
149388bd5121SAnton Altaparmakov */
ilookup5_nowait(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)149488bd5121SAnton Altaparmakov struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
149588bd5121SAnton Altaparmakov int (*test)(struct inode *, void *), void *data)
149688bd5121SAnton Altaparmakov {
149788bd5121SAnton Altaparmakov struct hlist_head *head = inode_hashtable + hash(sb, hashval);
14980b2d0724SChristoph Hellwig struct inode *inode;
149988bd5121SAnton Altaparmakov
15000b2d0724SChristoph Hellwig spin_lock(&inode_hash_lock);
15010b2d0724SChristoph Hellwig inode = find_inode(sb, head, test, data);
15020b2d0724SChristoph Hellwig spin_unlock(&inode_hash_lock);
15030b2d0724SChristoph Hellwig
1504c2b6d621SAl Viro return IS_ERR(inode) ? NULL : inode;
150588bd5121SAnton Altaparmakov }
150688bd5121SAnton Altaparmakov EXPORT_SYMBOL(ilookup5_nowait);
150788bd5121SAnton Altaparmakov
150888bd5121SAnton Altaparmakov /**
150988bd5121SAnton Altaparmakov * ilookup5 - search for an inode in the inode cache
151088bd5121SAnton Altaparmakov * @sb: super block of file system to search
151188bd5121SAnton Altaparmakov * @hashval: hash value (usually inode number) to search for
151288bd5121SAnton Altaparmakov * @test: callback used for comparisons between inodes
151388bd5121SAnton Altaparmakov * @data: opaque data pointer to pass to @test
151488bd5121SAnton Altaparmakov *
15150b2d0724SChristoph Hellwig * Search for the inode specified by @hashval and @data in the inode cache,
15160b2d0724SChristoph Hellwig * and if the inode is in the cache, return the inode with an incremented
15170b2d0724SChristoph Hellwig * reference count. Waits on I_NEW before returning the inode.
151888bd5121SAnton Altaparmakov * returned with an incremented reference count.
15191da177e4SLinus Torvalds *
15200b2d0724SChristoph Hellwig * This is a generalized version of ilookup() for file systems where the
15210b2d0724SChristoph Hellwig * inode number is not sufficient for unique identification of an inode.
15221da177e4SLinus Torvalds *
15230b2d0724SChristoph Hellwig * Note: @test is called with the inode_hash_lock held, so can't sleep.
15241da177e4SLinus Torvalds */
ilookup5(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)15251da177e4SLinus Torvalds struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
15261da177e4SLinus Torvalds int (*test)(struct inode *, void *), void *data)
15271da177e4SLinus Torvalds {
15282864f301SAl Viro struct inode *inode;
15292864f301SAl Viro again:
15302864f301SAl Viro inode = ilookup5_nowait(sb, hashval, test, data);
15312864f301SAl Viro if (inode) {
15320b2d0724SChristoph Hellwig wait_on_inode(inode);
15332864f301SAl Viro if (unlikely(inode_unhashed(inode))) {
15342864f301SAl Viro iput(inode);
15352864f301SAl Viro goto again;
15362864f301SAl Viro }
15372864f301SAl Viro }
15380b2d0724SChristoph Hellwig return inode;
15391da177e4SLinus Torvalds }
15401da177e4SLinus Torvalds EXPORT_SYMBOL(ilookup5);
15411da177e4SLinus Torvalds
15421da177e4SLinus Torvalds /**
15431da177e4SLinus Torvalds * ilookup - search for an inode in the inode cache
15441da177e4SLinus Torvalds * @sb: super block of file system to search
15451da177e4SLinus Torvalds * @ino: inode number to search for
15461da177e4SLinus Torvalds *
15470b2d0724SChristoph Hellwig * Search for the inode @ino in the inode cache, and if the inode is in the
15480b2d0724SChristoph Hellwig * cache, the inode is returned with an incremented reference count.
15491da177e4SLinus Torvalds */
ilookup(struct super_block * sb,unsigned long ino)15501da177e4SLinus Torvalds struct inode *ilookup(struct super_block *sb, unsigned long ino)
15511da177e4SLinus Torvalds {
15521da177e4SLinus Torvalds struct hlist_head *head = inode_hashtable + hash(sb, ino);
15530b2d0724SChristoph Hellwig struct inode *inode;
15542864f301SAl Viro again:
15550b2d0724SChristoph Hellwig spin_lock(&inode_hash_lock);
15560b2d0724SChristoph Hellwig inode = find_inode_fast(sb, head, ino);
15570b2d0724SChristoph Hellwig spin_unlock(&inode_hash_lock);
15580b2d0724SChristoph Hellwig
15592864f301SAl Viro if (inode) {
1560c2b6d621SAl Viro if (IS_ERR(inode))
1561c2b6d621SAl Viro return NULL;
15620b2d0724SChristoph Hellwig wait_on_inode(inode);
15632864f301SAl Viro if (unlikely(inode_unhashed(inode))) {
15642864f301SAl Viro iput(inode);
15652864f301SAl Viro goto again;
15662864f301SAl Viro }
15672864f301SAl Viro }
15680b2d0724SChristoph Hellwig return inode;
15691da177e4SLinus Torvalds }
15701da177e4SLinus Torvalds EXPORT_SYMBOL(ilookup);
15711da177e4SLinus Torvalds
1572fe032c42STheodore Ts'o /**
1573fe032c42STheodore Ts'o * find_inode_nowait - find an inode in the inode cache
1574fe032c42STheodore Ts'o * @sb: super block of file system to search
1575fe032c42STheodore Ts'o * @hashval: hash value (usually inode number) to search for
1576fe032c42STheodore Ts'o * @match: callback used for comparisons between inodes
1577fe032c42STheodore Ts'o * @data: opaque data pointer to pass to @match
1578fe032c42STheodore Ts'o *
1579fe032c42STheodore Ts'o * Search for the inode specified by @hashval and @data in the inode
1580fe032c42STheodore Ts'o * cache, where the helper function @match will return 0 if the inode
1581fe032c42STheodore Ts'o * does not match, 1 if the inode does match, and -1 if the search
1582fe032c42STheodore Ts'o * should be stopped. The @match function must be responsible for
1583fe032c42STheodore Ts'o * taking the i_lock spin_lock and checking i_state for an inode being
1584fe032c42STheodore Ts'o * freed or being initialized, and incrementing the reference count
1585fe032c42STheodore Ts'o * before returning 1. It also must not sleep, since it is called with
1586fe032c42STheodore Ts'o * the inode_hash_lock spinlock held.
1587fe032c42STheodore Ts'o *
1588fe032c42STheodore Ts'o * This is a even more generalized version of ilookup5() when the
1589fe032c42STheodore Ts'o * function must never block --- find_inode() can block in
1590fe032c42STheodore Ts'o * __wait_on_freeing_inode() --- or when the caller can not increment
1591fe032c42STheodore Ts'o * the reference count because the resulting iput() might cause an
1592fe032c42STheodore Ts'o * inode eviction. The tradeoff is that the @match funtion must be
1593fe032c42STheodore Ts'o * very carefully implemented.
1594fe032c42STheodore Ts'o */
find_inode_nowait(struct super_block * sb,unsigned long hashval,int (* match)(struct inode *,unsigned long,void *),void * data)1595fe032c42STheodore Ts'o struct inode *find_inode_nowait(struct super_block *sb,
1596fe032c42STheodore Ts'o unsigned long hashval,
1597fe032c42STheodore Ts'o int (*match)(struct inode *, unsigned long,
1598fe032c42STheodore Ts'o void *),
1599fe032c42STheodore Ts'o void *data)
1600fe032c42STheodore Ts'o {
1601fe032c42STheodore Ts'o struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1602fe032c42STheodore Ts'o struct inode *inode, *ret_inode = NULL;
1603fe032c42STheodore Ts'o int mval;
1604fe032c42STheodore Ts'o
1605fe032c42STheodore Ts'o spin_lock(&inode_hash_lock);
1606fe032c42STheodore Ts'o hlist_for_each_entry(inode, head, i_hash) {
1607fe032c42STheodore Ts'o if (inode->i_sb != sb)
1608fe032c42STheodore Ts'o continue;
1609fe032c42STheodore Ts'o mval = match(inode, hashval, data);
1610fe032c42STheodore Ts'o if (mval == 0)
1611fe032c42STheodore Ts'o continue;
1612fe032c42STheodore Ts'o if (mval == 1)
1613fe032c42STheodore Ts'o ret_inode = inode;
1614fe032c42STheodore Ts'o goto out;
1615fe032c42STheodore Ts'o }
1616fe032c42STheodore Ts'o out:
1617fe032c42STheodore Ts'o spin_unlock(&inode_hash_lock);
1618fe032c42STheodore Ts'o return ret_inode;
1619fe032c42STheodore Ts'o }
1620fe032c42STheodore Ts'o EXPORT_SYMBOL(find_inode_nowait);
1621fe032c42STheodore Ts'o
16223f19b2abSDavid Howells /**
16233f19b2abSDavid Howells * find_inode_rcu - find an inode in the inode cache
16243f19b2abSDavid Howells * @sb: Super block of file system to search
16253f19b2abSDavid Howells * @hashval: Key to hash
16263f19b2abSDavid Howells * @test: Function to test match on an inode
16273f19b2abSDavid Howells * @data: Data for test function
16283f19b2abSDavid Howells *
16293f19b2abSDavid Howells * Search for the inode specified by @hashval and @data in the inode cache,
16303f19b2abSDavid Howells * where the helper function @test will return 0 if the inode does not match
16313f19b2abSDavid Howells * and 1 if it does. The @test function must be responsible for taking the
16323f19b2abSDavid Howells * i_lock spin_lock and checking i_state for an inode being freed or being
16333f19b2abSDavid Howells * initialized.
16343f19b2abSDavid Howells *
16353f19b2abSDavid Howells * If successful, this will return the inode for which the @test function
16363f19b2abSDavid Howells * returned 1 and NULL otherwise.
16373f19b2abSDavid Howells *
16383f19b2abSDavid Howells * The @test function is not permitted to take a ref on any inode presented.
16393f19b2abSDavid Howells * It is also not permitted to sleep.
16403f19b2abSDavid Howells *
16413f19b2abSDavid Howells * The caller must hold the RCU read lock.
16423f19b2abSDavid Howells */
find_inode_rcu(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)16433f19b2abSDavid Howells struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
16443f19b2abSDavid Howells int (*test)(struct inode *, void *), void *data)
16453f19b2abSDavid Howells {
16463f19b2abSDavid Howells struct hlist_head *head = inode_hashtable + hash(sb, hashval);
16473f19b2abSDavid Howells struct inode *inode;
16483f19b2abSDavid Howells
16493f19b2abSDavid Howells RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
16503f19b2abSDavid Howells "suspicious find_inode_rcu() usage");
16513f19b2abSDavid Howells
16523f19b2abSDavid Howells hlist_for_each_entry_rcu(inode, head, i_hash) {
16533f19b2abSDavid Howells if (inode->i_sb == sb &&
16543f19b2abSDavid Howells !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
16553f19b2abSDavid Howells test(inode, data))
16563f19b2abSDavid Howells return inode;
16573f19b2abSDavid Howells }
16583f19b2abSDavid Howells return NULL;
16593f19b2abSDavid Howells }
16603f19b2abSDavid Howells EXPORT_SYMBOL(find_inode_rcu);
16613f19b2abSDavid Howells
16623f19b2abSDavid Howells /**
1663961f3c89SMauro Carvalho Chehab * find_inode_by_ino_rcu - Find an inode in the inode cache
16643f19b2abSDavid Howells * @sb: Super block of file system to search
16653f19b2abSDavid Howells * @ino: The inode number to match
16663f19b2abSDavid Howells *
16673f19b2abSDavid Howells * Search for the inode specified by @hashval and @data in the inode cache,
16683f19b2abSDavid Howells * where the helper function @test will return 0 if the inode does not match
16693f19b2abSDavid Howells * and 1 if it does. The @test function must be responsible for taking the
16703f19b2abSDavid Howells * i_lock spin_lock and checking i_state for an inode being freed or being
16713f19b2abSDavid Howells * initialized.
16723f19b2abSDavid Howells *
16733f19b2abSDavid Howells * If successful, this will return the inode for which the @test function
16743f19b2abSDavid Howells * returned 1 and NULL otherwise.
16753f19b2abSDavid Howells *
16763f19b2abSDavid Howells * The @test function is not permitted to take a ref on any inode presented.
16773f19b2abSDavid Howells * It is also not permitted to sleep.
16783f19b2abSDavid Howells *
16793f19b2abSDavid Howells * The caller must hold the RCU read lock.
16803f19b2abSDavid Howells */
find_inode_by_ino_rcu(struct super_block * sb,unsigned long ino)16813f19b2abSDavid Howells struct inode *find_inode_by_ino_rcu(struct super_block *sb,
16823f19b2abSDavid Howells unsigned long ino)
16833f19b2abSDavid Howells {
16843f19b2abSDavid Howells struct hlist_head *head = inode_hashtable + hash(sb, ino);
16853f19b2abSDavid Howells struct inode *inode;
16863f19b2abSDavid Howells
16873f19b2abSDavid Howells RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
16883f19b2abSDavid Howells "suspicious find_inode_by_ino_rcu() usage");
16893f19b2abSDavid Howells
16903f19b2abSDavid Howells hlist_for_each_entry_rcu(inode, head, i_hash) {
16913f19b2abSDavid Howells if (inode->i_ino == ino &&
16923f19b2abSDavid Howells inode->i_sb == sb &&
16933f19b2abSDavid Howells !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
16943f19b2abSDavid Howells return inode;
16953f19b2abSDavid Howells }
16963f19b2abSDavid Howells return NULL;
16973f19b2abSDavid Howells }
16983f19b2abSDavid Howells EXPORT_SYMBOL(find_inode_by_ino_rcu);
16993f19b2abSDavid Howells
insert_inode_locked(struct inode * inode)1700261bca86SAl Viro int insert_inode_locked(struct inode *inode)
1701261bca86SAl Viro {
1702261bca86SAl Viro struct super_block *sb = inode->i_sb;
1703261bca86SAl Viro ino_t ino = inode->i_ino;
1704261bca86SAl Viro struct hlist_head *head = inode_hashtable + hash(sb, ino);
1705261bca86SAl Viro
1706261bca86SAl Viro while (1) {
170772a43d63SAl Viro struct inode *old = NULL;
170867a23c49SDave Chinner spin_lock(&inode_hash_lock);
1709b67bfe0dSSasha Levin hlist_for_each_entry(old, head, i_hash) {
171072a43d63SAl Viro if (old->i_ino != ino)
171172a43d63SAl Viro continue;
171272a43d63SAl Viro if (old->i_sb != sb)
171372a43d63SAl Viro continue;
1714250df6edSDave Chinner spin_lock(&old->i_lock);
1715250df6edSDave Chinner if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1716250df6edSDave Chinner spin_unlock(&old->i_lock);
171772a43d63SAl Viro continue;
1718250df6edSDave Chinner }
171972a43d63SAl Viro break;
172072a43d63SAl Viro }
1721b67bfe0dSSasha Levin if (likely(!old)) {
1722250df6edSDave Chinner spin_lock(&inode->i_lock);
1723c2b6d621SAl Viro inode->i_state |= I_NEW | I_CREATING;
17243f19b2abSDavid Howells hlist_add_head_rcu(&inode->i_hash, head);
1725250df6edSDave Chinner spin_unlock(&inode->i_lock);
172667a23c49SDave Chinner spin_unlock(&inode_hash_lock);
1727261bca86SAl Viro return 0;
1728261bca86SAl Viro }
1729c2b6d621SAl Viro if (unlikely(old->i_state & I_CREATING)) {
1730c2b6d621SAl Viro spin_unlock(&old->i_lock);
1731c2b6d621SAl Viro spin_unlock(&inode_hash_lock);
1732c2b6d621SAl Viro return -EBUSY;
1733c2b6d621SAl Viro }
1734261bca86SAl Viro __iget(old);
1735250df6edSDave Chinner spin_unlock(&old->i_lock);
173667a23c49SDave Chinner spin_unlock(&inode_hash_lock);
1737261bca86SAl Viro wait_on_inode(old);
17381d3382cbSAl Viro if (unlikely(!inode_unhashed(old))) {
1739261bca86SAl Viro iput(old);
1740261bca86SAl Viro return -EBUSY;
1741261bca86SAl Viro }
1742261bca86SAl Viro iput(old);
1743261bca86SAl Viro }
1744261bca86SAl Viro }
1745261bca86SAl Viro EXPORT_SYMBOL(insert_inode_locked);
1746261bca86SAl Viro
insert_inode_locked4(struct inode * inode,unsigned long hashval,int (* test)(struct inode *,void *),void * data)1747261bca86SAl Viro int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1748261bca86SAl Viro int (*test)(struct inode *, void *), void *data)
1749261bca86SAl Viro {
1750c2b6d621SAl Viro struct inode *old;
1751c2b6d621SAl Viro
1752c2b6d621SAl Viro inode->i_state |= I_CREATING;
1753c2b6d621SAl Viro old = inode_insert5(inode, hashval, test, NULL, data);
1754261bca86SAl Viro
175580ea09a0SMiklos Szeredi if (old != inode) {
1756261bca86SAl Viro iput(old);
1757261bca86SAl Viro return -EBUSY;
1758261bca86SAl Viro }
175980ea09a0SMiklos Szeredi return 0;
1760261bca86SAl Viro }
1761261bca86SAl Viro EXPORT_SYMBOL(insert_inode_locked4);
1762261bca86SAl Viro
17631da177e4SLinus Torvalds
generic_delete_inode(struct inode * inode)176445321ac5SAl Viro int generic_delete_inode(struct inode *inode)
17651da177e4SLinus Torvalds {
176645321ac5SAl Viro return 1;
17671da177e4SLinus Torvalds }
17681da177e4SLinus Torvalds EXPORT_SYMBOL(generic_delete_inode);
17691da177e4SLinus Torvalds
177045321ac5SAl Viro /*
177145321ac5SAl Viro * Called when we're dropping the last reference
177245321ac5SAl Viro * to an inode.
177345321ac5SAl Viro *
177445321ac5SAl Viro * Call the FS "drop_inode()" function, defaulting to
177545321ac5SAl Viro * the legacy UNIX filesystem behaviour. If it tells
177645321ac5SAl Viro * us to evict inode, do so. Otherwise, retain inode
177745321ac5SAl Viro * in cache if fs is alive, sync and evict if fs is
177845321ac5SAl Viro * shutting down.
177945321ac5SAl Viro */
iput_final(struct inode * inode)178045321ac5SAl Viro static void iput_final(struct inode *inode)
17811da177e4SLinus Torvalds {
17821da177e4SLinus Torvalds struct super_block *sb = inode->i_sb;
178345321ac5SAl Viro const struct super_operations *op = inode->i_sb->s_op;
17843f19b2abSDavid Howells unsigned long state;
178545321ac5SAl Viro int drop;
17861da177e4SLinus Torvalds
1787250df6edSDave Chinner WARN_ON(inode->i_state & I_NEW);
1788250df6edSDave Chinner
1789e7f59097SAl Viro if (op->drop_inode)
179045321ac5SAl Viro drop = op->drop_inode(inode);
179145321ac5SAl Viro else
179245321ac5SAl Viro drop = generic_drop_inode(inode);
179345321ac5SAl Viro
179488149082SHao Li if (!drop &&
179588149082SHao Li !(inode->i_state & I_DONTCACHE) &&
179688149082SHao Li (sb->s_flags & SB_ACTIVE)) {
179751b8c1feSJohannes Weiner __inode_add_lru(inode, true);
1798250df6edSDave Chinner spin_unlock(&inode->i_lock);
179945321ac5SAl Viro return;
1800991114c6SAlexander Viro }
1801b2b2af8eSDave Chinner
18023f19b2abSDavid Howells state = inode->i_state;
1803b2b2af8eSDave Chinner if (!drop) {
18043f19b2abSDavid Howells WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
1805250df6edSDave Chinner spin_unlock(&inode->i_lock);
18063f19b2abSDavid Howells
18071da177e4SLinus Torvalds write_inode_now(inode, 1);
18083f19b2abSDavid Howells
1809250df6edSDave Chinner spin_lock(&inode->i_lock);
18103f19b2abSDavid Howells state = inode->i_state;
18113f19b2abSDavid Howells WARN_ON(state & I_NEW);
18123f19b2abSDavid Howells state &= ~I_WILL_FREE;
18131da177e4SLinus Torvalds }
18147ccf19a8SNick Piggin
18153f19b2abSDavid Howells WRITE_ONCE(inode->i_state, state | I_FREEING);
1816c4ae0c65SEric Dumazet if (!list_empty(&inode->i_lru))
18179e38d86fSNick Piggin inode_lru_list_del(inode);
1818250df6edSDave Chinner spin_unlock(&inode->i_lock);
1819b2b2af8eSDave Chinner
1820b2b2af8eSDave Chinner evict(inode);
18211da177e4SLinus Torvalds }
18221da177e4SLinus Torvalds
18231da177e4SLinus Torvalds /**
18241da177e4SLinus Torvalds * iput - put an inode
18251da177e4SLinus Torvalds * @inode: inode to put
18261da177e4SLinus Torvalds *
18271da177e4SLinus Torvalds * Puts an inode, dropping its usage count. If the inode use count hits
18281da177e4SLinus Torvalds * zero, the inode is then freed and may also be destroyed.
18291da177e4SLinus Torvalds *
18301da177e4SLinus Torvalds * Consequently, iput() can sleep.
18311da177e4SLinus Torvalds */
iput(struct inode * inode)18321da177e4SLinus Torvalds void iput(struct inode *inode)
18331da177e4SLinus Torvalds {
18340ae45f63STheodore Ts'o if (!inode)
18350ae45f63STheodore Ts'o return;
1836a4ffdde6SAl Viro BUG_ON(inode->i_state & I_CLEAR);
18370ae45f63STheodore Ts'o retry:
18380ae45f63STheodore Ts'o if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
18390ae45f63STheodore Ts'o if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
18400ae45f63STheodore Ts'o atomic_inc(&inode->i_count);
18410ae45f63STheodore Ts'o spin_unlock(&inode->i_lock);
18420ae45f63STheodore Ts'o trace_writeback_lazytime_iput(inode);
18430ae45f63STheodore Ts'o mark_inode_dirty_sync(inode);
18440ae45f63STheodore Ts'o goto retry;
18450ae45f63STheodore Ts'o }
18461da177e4SLinus Torvalds iput_final(inode);
18471da177e4SLinus Torvalds }
18481da177e4SLinus Torvalds }
18491da177e4SLinus Torvalds EXPORT_SYMBOL(iput);
18501da177e4SLinus Torvalds
185130460e1eSCarlos Maiolino #ifdef CONFIG_BLOCK
18521da177e4SLinus Torvalds /**
18531da177e4SLinus Torvalds * bmap - find a block number in a file
185430460e1eSCarlos Maiolino * @inode: inode owning the block number being requested
185530460e1eSCarlos Maiolino * @block: pointer containing the block to find
18561da177e4SLinus Torvalds *
18572b8e8b55SMauro Carvalho Chehab * Replaces the value in ``*block`` with the block number on the device holding
185830460e1eSCarlos Maiolino * corresponding to the requested block number in the file.
185930460e1eSCarlos Maiolino * That is, asked for block 4 of inode 1 the function will replace the
18602b8e8b55SMauro Carvalho Chehab * 4 in ``*block``, with disk block relative to the disk start that holds that
186130460e1eSCarlos Maiolino * block of the file.
186230460e1eSCarlos Maiolino *
186330460e1eSCarlos Maiolino * Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
18642b8e8b55SMauro Carvalho Chehab * hole, returns 0 and ``*block`` is also set to 0.
18651da177e4SLinus Torvalds */
bmap(struct inode * inode,sector_t * block)186630460e1eSCarlos Maiolino int bmap(struct inode *inode, sector_t *block)
18671da177e4SLinus Torvalds {
186830460e1eSCarlos Maiolino if (!inode->i_mapping->a_ops->bmap)
186930460e1eSCarlos Maiolino return -EINVAL;
187030460e1eSCarlos Maiolino
187130460e1eSCarlos Maiolino *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
187230460e1eSCarlos Maiolino return 0;
18731da177e4SLinus Torvalds }
18741da177e4SLinus Torvalds EXPORT_SYMBOL(bmap);
187530460e1eSCarlos Maiolino #endif
18761da177e4SLinus Torvalds
187711ff6f05SMatthew Garrett /*
187811ff6f05SMatthew Garrett * With relative atime, only update atime if the previous atime is
1879d98ffa1aSStephen Kitt * earlier than or equal to either the ctime or mtime,
1880d98ffa1aSStephen Kitt * or if at least a day has passed since the last atime update.
188111ff6f05SMatthew Garrett */
relatime_need_update(struct vfsmount * mnt,struct inode * inode,struct timespec64 now)1882c6718543SMiklos Szeredi static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
18836f22b664SArnd Bergmann struct timespec64 now)
188411ff6f05SMatthew Garrett {
18852276e5baSJeff Layton struct timespec64 ctime;
188611ff6f05SMatthew Garrett
1887c6718543SMiklos Szeredi if (!(mnt->mnt_flags & MNT_RELATIME))
188811ff6f05SMatthew Garrett return 1;
188911ff6f05SMatthew Garrett /*
1890d98ffa1aSStephen Kitt * Is mtime younger than or equal to atime? If yes, update atime:
189111ff6f05SMatthew Garrett */
189295582b00SDeepa Dinamani if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0)
189311ff6f05SMatthew Garrett return 1;
189411ff6f05SMatthew Garrett /*
1895d98ffa1aSStephen Kitt * Is ctime younger than or equal to atime? If yes, update atime:
189611ff6f05SMatthew Garrett */
18972276e5baSJeff Layton ctime = inode_get_ctime(inode);
18982276e5baSJeff Layton if (timespec64_compare(&ctime, &inode->i_atime) >= 0)
189911ff6f05SMatthew Garrett return 1;
190011ff6f05SMatthew Garrett
190111ff6f05SMatthew Garrett /*
190211ff6f05SMatthew Garrett * Is the previous atime value older than a day? If yes,
190311ff6f05SMatthew Garrett * update atime:
190411ff6f05SMatthew Garrett */
190511ff6f05SMatthew Garrett if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
190611ff6f05SMatthew Garrett return 1;
190711ff6f05SMatthew Garrett /*
190811ff6f05SMatthew Garrett * Good, we can skip the atime update:
190911ff6f05SMatthew Garrett */
191011ff6f05SMatthew Garrett return 0;
191111ff6f05SMatthew Garrett }
191211ff6f05SMatthew Garrett
1913541d4c79SJeff Layton /**
1914541d4c79SJeff Layton * inode_update_timestamps - update the timestamps on the inode
1915541d4c79SJeff Layton * @inode: inode to be updated
1916541d4c79SJeff Layton * @flags: S_* flags that needed to be updated
1917541d4c79SJeff Layton *
1918541d4c79SJeff Layton * The update_time function is called when an inode's timestamps need to be
1919541d4c79SJeff Layton * updated for a read or write operation. This function handles updating the
1920541d4c79SJeff Layton * actual timestamps. It's up to the caller to ensure that the inode is marked
1921541d4c79SJeff Layton * dirty appropriately.
1922541d4c79SJeff Layton *
1923541d4c79SJeff Layton * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated,
1924541d4c79SJeff Layton * attempt to update all three of them. S_ATIME updates can be handled
1925541d4c79SJeff Layton * independently of the rest.
1926541d4c79SJeff Layton *
1927541d4c79SJeff Layton * Returns a set of S_* flags indicating which values changed.
1928541d4c79SJeff Layton */
inode_update_timestamps(struct inode * inode,int flags)1929541d4c79SJeff Layton int inode_update_timestamps(struct inode *inode, int flags)
1930c3b2da31SJosef Bacik {
1931541d4c79SJeff Layton int updated = 0;
1932541d4c79SJeff Layton struct timespec64 now;
1933c3b2da31SJosef Bacik
1934541d4c79SJeff Layton if (flags & (S_MTIME|S_CTIME|S_VERSION)) {
1935541d4c79SJeff Layton struct timespec64 ctime = inode_get_ctime(inode);
19360ae45f63STheodore Ts'o
1937541d4c79SJeff Layton now = inode_set_ctime_current(inode);
1938541d4c79SJeff Layton if (!timespec64_equal(&now, &ctime))
1939541d4c79SJeff Layton updated |= S_CTIME;
1940541d4c79SJeff Layton if (!timespec64_equal(&now, &inode->i_mtime)) {
1941541d4c79SJeff Layton inode->i_mtime = now;
1942541d4c79SJeff Layton updated |= S_MTIME;
1943541d4c79SJeff Layton }
1944541d4c79SJeff Layton if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated))
1945541d4c79SJeff Layton updated |= S_VERSION;
1946541d4c79SJeff Layton } else {
1947541d4c79SJeff Layton now = current_time(inode);
1948e20b14dbSEric Biggers }
1949e20b14dbSEric Biggers
1950541d4c79SJeff Layton if (flags & S_ATIME) {
1951541d4c79SJeff Layton if (!timespec64_equal(&now, &inode->i_atime)) {
1952541d4c79SJeff Layton inode->i_atime = now;
1953541d4c79SJeff Layton updated |= S_ATIME;
1954541d4c79SJeff Layton }
1955541d4c79SJeff Layton }
1956541d4c79SJeff Layton return updated;
1957541d4c79SJeff Layton }
1958541d4c79SJeff Layton EXPORT_SYMBOL(inode_update_timestamps);
1959e20b14dbSEric Biggers
1960541d4c79SJeff Layton /**
1961541d4c79SJeff Layton * generic_update_time - update the timestamps on the inode
1962541d4c79SJeff Layton * @inode: inode to be updated
1963541d4c79SJeff Layton * @flags: S_* flags that needed to be updated
1964541d4c79SJeff Layton *
1965541d4c79SJeff Layton * The update_time function is called when an inode's timestamps need to be
1966541d4c79SJeff Layton * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME,
1967541d4c79SJeff Layton * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME
1968541d4c79SJeff Layton * updates can be handled done independently of the rest.
1969541d4c79SJeff Layton *
1970541d4c79SJeff Layton * Returns a S_* mask indicating which fields were updated.
1971541d4c79SJeff Layton */
generic_update_time(struct inode * inode,int flags)1972541d4c79SJeff Layton int generic_update_time(struct inode *inode, int flags)
1973541d4c79SJeff Layton {
1974541d4c79SJeff Layton int updated = inode_update_timestamps(inode, flags);
1975541d4c79SJeff Layton int dirty_flags = 0;
1976541d4c79SJeff Layton
1977541d4c79SJeff Layton if (updated & (S_ATIME|S_MTIME|S_CTIME))
1978541d4c79SJeff Layton dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC;
1979541d4c79SJeff Layton if (updated & S_VERSION)
1980541d4c79SJeff Layton dirty_flags |= I_DIRTY_SYNC;
1981e20b14dbSEric Biggers __mark_inode_dirty(inode, dirty_flags);
1982541d4c79SJeff Layton return updated;
1983c3b2da31SJosef Bacik }
19840ae45f63STheodore Ts'o EXPORT_SYMBOL(generic_update_time);
19850ae45f63STheodore Ts'o
19860ae45f63STheodore Ts'o /*
19870ae45f63STheodore Ts'o * This does the actual work of updating an inodes time or version. Must have
19880ae45f63STheodore Ts'o * had called mnt_want_write() before calling this.
19890ae45f63STheodore Ts'o */
inode_update_time(struct inode * inode,int flags)1990913e9928SJeff Layton int inode_update_time(struct inode *inode, int flags)
19910ae45f63STheodore Ts'o {
199223b424d9SDeepa Dinamani if (inode->i_op->update_time)
1993913e9928SJeff Layton return inode->i_op->update_time(inode, flags);
1994541d4c79SJeff Layton generic_update_time(inode, flags);
1995541d4c79SJeff Layton return 0;
19960ae45f63STheodore Ts'o }
1997e60feb44SJosef Bacik EXPORT_SYMBOL(inode_update_time);
1998c3b2da31SJosef Bacik
19991da177e4SLinus Torvalds /**
2000961f3c89SMauro Carvalho Chehab * atime_needs_update - update the access time
2001185553b2SRandy Dunlap * @path: the &struct path to update
200230fdc8eeSRandy Dunlap * @inode: inode to update
20031da177e4SLinus Torvalds *
20041da177e4SLinus Torvalds * Update the accessed time on an inode and mark it for writeback.
20051da177e4SLinus Torvalds * This function automatically handles read only file systems and media,
20061da177e4SLinus Torvalds * as well as the "noatime" flag and inode specific "noatime" markers.
20071da177e4SLinus Torvalds */
atime_needs_update(const struct path * path,struct inode * inode)2008c6718543SMiklos Szeredi bool atime_needs_update(const struct path *path, struct inode *inode)
20098fa9dd24SNeilBrown {
20108fa9dd24SNeilBrown struct vfsmount *mnt = path->mnt;
201195582b00SDeepa Dinamani struct timespec64 now;
20128fa9dd24SNeilBrown
20138fa9dd24SNeilBrown if (inode->i_flags & S_NOATIME)
20148fa9dd24SNeilBrown return false;
20150bd23d09SEric W. Biederman
20160bd23d09SEric W. Biederman /* Atime updates will likely cause i_uid and i_gid to be written
20170bd23d09SEric W. Biederman * back improprely if their true value is unknown to the vfs.
20180bd23d09SEric W. Biederman */
20194609e1f1SChristian Brauner if (HAS_UNMAPPED_ID(mnt_idmap(mnt), inode))
20200bd23d09SEric W. Biederman return false;
20210bd23d09SEric W. Biederman
20228fa9dd24SNeilBrown if (IS_NOATIME(inode))
20238fa9dd24SNeilBrown return false;
20241751e8a6SLinus Torvalds if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
20258fa9dd24SNeilBrown return false;
20268fa9dd24SNeilBrown
20278fa9dd24SNeilBrown if (mnt->mnt_flags & MNT_NOATIME)
20288fa9dd24SNeilBrown return false;
20298fa9dd24SNeilBrown if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
20308fa9dd24SNeilBrown return false;
20318fa9dd24SNeilBrown
2032c2050a45SDeepa Dinamani now = current_time(inode);
20338fa9dd24SNeilBrown
20346f22b664SArnd Bergmann if (!relatime_need_update(mnt, inode, now))
20358fa9dd24SNeilBrown return false;
20368fa9dd24SNeilBrown
203795582b00SDeepa Dinamani if (timespec64_equal(&inode->i_atime, &now))
20388fa9dd24SNeilBrown return false;
20398fa9dd24SNeilBrown
20408fa9dd24SNeilBrown return true;
20418fa9dd24SNeilBrown }
20428fa9dd24SNeilBrown
touch_atime(const struct path * path)2043badcf2b7SAl Viro void touch_atime(const struct path *path)
20441da177e4SLinus Torvalds {
204568ac1234SAl Viro struct vfsmount *mnt = path->mnt;
2046df2b1afdSDavid Howells struct inode *inode = d_inode(path->dentry);
20471da177e4SLinus Torvalds
2048c6718543SMiklos Szeredi if (!atime_needs_update(path, inode))
2049b12536c2SAndi Kleen return;
2050b12536c2SAndi Kleen
20515d37e9e6SJan Kara if (!sb_start_write_trylock(inode->i_sb))
2052b12536c2SAndi Kleen return;
205347ae32d6SValerie Henson
20548fa9dd24SNeilBrown if (__mnt_want_write(mnt) != 0)
20555d37e9e6SJan Kara goto skip_update;
2056c3b2da31SJosef Bacik /*
2057c3b2da31SJosef Bacik * File systems can error out when updating inodes if they need to
2058c3b2da31SJosef Bacik * allocate new space to modify an inode (such is the case for
2059c3b2da31SJosef Bacik * Btrfs), but since we touch atime while walking down the path we
2060c3b2da31SJosef Bacik * really don't care if we failed to update the atime of the file,
2061c3b2da31SJosef Bacik * so just ignore the return value.
20622bc55652SAlexander Block * We may also fail on filesystems that have the ability to make parts
20632bc55652SAlexander Block * of the fs read only, e.g. subvolumes in Btrfs.
2064c3b2da31SJosef Bacik */
2065913e9928SJeff Layton inode_update_time(inode, S_ATIME);
20665d37e9e6SJan Kara __mnt_drop_write(mnt);
20675d37e9e6SJan Kara skip_update:
20685d37e9e6SJan Kara sb_end_write(inode->i_sb);
20691da177e4SLinus Torvalds }
2070869243a0SChristoph Hellwig EXPORT_SYMBOL(touch_atime);
20711da177e4SLinus Torvalds
20723ed37648SCong Wang /*
2073dbfae0cdSJan Kara * Return mask of changes for notify_change() that need to be done as a
2074dbfae0cdSJan Kara * response to write or truncate. Return 0 if nothing has to be changed.
2075dbfae0cdSJan Kara * Negative value on error (change should be denied).
2076dbfae0cdSJan Kara */
dentry_needs_remove_privs(struct mnt_idmap * idmap,struct dentry * dentry)20779452e93eSChristian Brauner int dentry_needs_remove_privs(struct mnt_idmap *idmap,
2078ed5a7047SChristian Brauner struct dentry *dentry)
2079dbfae0cdSJan Kara {
2080dbfae0cdSJan Kara struct inode *inode = d_inode(dentry);
2081dbfae0cdSJan Kara int mask = 0;
2082dbfae0cdSJan Kara int ret;
2083dbfae0cdSJan Kara
2084dbfae0cdSJan Kara if (IS_NOSEC(inode))
2085dbfae0cdSJan Kara return 0;
2086dbfae0cdSJan Kara
20879452e93eSChristian Brauner mask = setattr_should_drop_suidgid(idmap, inode);
2088dbfae0cdSJan Kara ret = security_inode_need_killpriv(dentry);
2089dbfae0cdSJan Kara if (ret < 0)
2090dbfae0cdSJan Kara return ret;
2091dbfae0cdSJan Kara if (ret)
2092dbfae0cdSJan Kara mask |= ATTR_KILL_PRIV;
2093dbfae0cdSJan Kara return mask;
2094dbfae0cdSJan Kara }
2095dbfae0cdSJan Kara
__remove_privs(struct mnt_idmap * idmap,struct dentry * dentry,int kill)2096abf08576SChristian Brauner static int __remove_privs(struct mnt_idmap *idmap,
2097643fe55aSChristian Brauner struct dentry *dentry, int kill)
20983ed37648SCong Wang {
20993ed37648SCong Wang struct iattr newattrs;
21003ed37648SCong Wang
21013ed37648SCong Wang newattrs.ia_valid = ATTR_FORCE | kill;
210227ac0ffeSJ. Bruce Fields /*
210327ac0ffeSJ. Bruce Fields * Note we call this on write, so notify_change will not
210427ac0ffeSJ. Bruce Fields * encounter any conflicting delegations:
210527ac0ffeSJ. Bruce Fields */
2106abf08576SChristian Brauner return notify_change(idmap, dentry, &newattrs, NULL);
21073ed37648SCong Wang }
21083ed37648SCong Wang
__file_remove_privs(struct file * file,unsigned int flags)2109faf99b56SStefan Roesch static int __file_remove_privs(struct file *file, unsigned int flags)
21103ed37648SCong Wang {
2111c1892c37SMiklos Szeredi struct dentry *dentry = file_dentry(file);
2112c1892c37SMiklos Szeredi struct inode *inode = file_inode(file);
211341191cf6SStefan Roesch int error = 0;
2114dbfae0cdSJan Kara int kill;
21153ed37648SCong Wang
2116f69e749aSAlexander Lochmann if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
21173ed37648SCong Wang return 0;
21183ed37648SCong Wang
21199452e93eSChristian Brauner kill = dentry_needs_remove_privs(file_mnt_idmap(file), dentry);
212041191cf6SStefan Roesch if (kill < 0)
2121dbfae0cdSJan Kara return kill;
2122faf99b56SStefan Roesch
212341191cf6SStefan Roesch if (kill) {
2124faf99b56SStefan Roesch if (flags & IOCB_NOWAIT)
2125faf99b56SStefan Roesch return -EAGAIN;
2126faf99b56SStefan Roesch
2127abf08576SChristian Brauner error = __remove_privs(file_mnt_idmap(file), dentry, kill);
212841191cf6SStefan Roesch }
212941191cf6SStefan Roesch
21302426f391SJan Kara if (!error)
21312426f391SJan Kara inode_has_no_xattr(inode);
21323ed37648SCong Wang return error;
21333ed37648SCong Wang }
2134faf99b56SStefan Roesch
2135faf99b56SStefan Roesch /**
2136faf99b56SStefan Roesch * file_remove_privs - remove special file privileges (suid, capabilities)
2137faf99b56SStefan Roesch * @file: file to remove privileges from
2138faf99b56SStefan Roesch *
2139faf99b56SStefan Roesch * When file is modified by a write or truncation ensure that special
2140faf99b56SStefan Roesch * file privileges are removed.
2141faf99b56SStefan Roesch *
2142faf99b56SStefan Roesch * Return: 0 on success, negative errno on failure.
2143faf99b56SStefan Roesch */
file_remove_privs(struct file * file)2144faf99b56SStefan Roesch int file_remove_privs(struct file *file)
2145faf99b56SStefan Roesch {
2146faf99b56SStefan Roesch return __file_remove_privs(file, 0);
2147faf99b56SStefan Roesch }
21485fa8e0a1SJan Kara EXPORT_SYMBOL(file_remove_privs);
21493ed37648SCong Wang
inode_needs_update_time(struct inode * inode)2150913e9928SJeff Layton static int inode_needs_update_time(struct inode *inode)
21511da177e4SLinus Torvalds {
2152c3b2da31SJosef Bacik int sync_it = 0;
2153647aa768SChristian Brauner struct timespec64 now = current_time(inode);
21542276e5baSJeff Layton struct timespec64 ctime;
21551da177e4SLinus Torvalds
2156ce06e0b2SAndi Kleen /* First try to exhaust all avenues to not sync */
21571da177e4SLinus Torvalds if (IS_NOCMTIME(inode))
2158c3b2da31SJosef Bacik return 0;
215920ddee2cSDave Hansen
2160913e9928SJeff Layton if (!timespec64_equal(&inode->i_mtime, &now))
2161ce06e0b2SAndi Kleen sync_it = S_MTIME;
2162ce06e0b2SAndi Kleen
21632276e5baSJeff Layton ctime = inode_get_ctime(inode);
2164913e9928SJeff Layton if (!timespec64_equal(&ctime, &now))
2165ce06e0b2SAndi Kleen sync_it |= S_CTIME;
2166ce06e0b2SAndi Kleen
2167e38cf302SJeff Layton if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
2168ce06e0b2SAndi Kleen sync_it |= S_VERSION;
2169ce06e0b2SAndi Kleen
21706a2aa5d8SStefan Roesch return sync_it;
21716a2aa5d8SStefan Roesch }
2172ed97bd37SAndreas Mohr
__file_update_time(struct file * file,int sync_mode)2173913e9928SJeff Layton static int __file_update_time(struct file *file, int sync_mode)
21746a2aa5d8SStefan Roesch {
21756a2aa5d8SStefan Roesch int ret = 0;
21766a2aa5d8SStefan Roesch struct inode *inode = file_inode(file);
21776a2aa5d8SStefan Roesch
21786a2aa5d8SStefan Roesch /* try to update time settings */
21796a2aa5d8SStefan Roesch if (!__mnt_want_write_file(file)) {
2180913e9928SJeff Layton ret = inode_update_time(inode, sync_mode);
2181eb04c282SJan Kara __mnt_drop_write_file(file);
21826a2aa5d8SStefan Roesch }
2183c3b2da31SJosef Bacik
2184c3b2da31SJosef Bacik return ret;
21851da177e4SLinus Torvalds }
21866a2aa5d8SStefan Roesch
21876a2aa5d8SStefan Roesch /**
21886a2aa5d8SStefan Roesch * file_update_time - update mtime and ctime time
21896a2aa5d8SStefan Roesch * @file: file accessed
21906a2aa5d8SStefan Roesch *
21916a2aa5d8SStefan Roesch * Update the mtime and ctime members of an inode and mark the inode for
21926a2aa5d8SStefan Roesch * writeback. Note that this function is meant exclusively for usage in
21936a2aa5d8SStefan Roesch * the file write path of filesystems, and filesystems may choose to
21946a2aa5d8SStefan Roesch * explicitly ignore updates via this function with the _NOCMTIME inode
21956a2aa5d8SStefan Roesch * flag, e.g. for network filesystem where these imestamps are handled
21966a2aa5d8SStefan Roesch * by the server. This can return an error for file systems who need to
21976a2aa5d8SStefan Roesch * allocate space in order to update an inode.
21986a2aa5d8SStefan Roesch *
21996a2aa5d8SStefan Roesch * Return: 0 on success, negative errno on failure.
22006a2aa5d8SStefan Roesch */
file_update_time(struct file * file)22016a2aa5d8SStefan Roesch int file_update_time(struct file *file)
22026a2aa5d8SStefan Roesch {
22036a2aa5d8SStefan Roesch int ret;
22046a2aa5d8SStefan Roesch struct inode *inode = file_inode(file);
22056a2aa5d8SStefan Roesch
2206913e9928SJeff Layton ret = inode_needs_update_time(inode);
22076a2aa5d8SStefan Roesch if (ret <= 0)
22086a2aa5d8SStefan Roesch return ret;
22096a2aa5d8SStefan Roesch
2210913e9928SJeff Layton return __file_update_time(file, ret);
22116a2aa5d8SStefan Roesch }
2212870f4817SChristoph Hellwig EXPORT_SYMBOL(file_update_time);
22131da177e4SLinus Torvalds
2214faf99b56SStefan Roesch /**
221566fa3cedSStefan Roesch * file_modified_flags - handle mandated vfs changes when modifying a file
221666fa3cedSStefan Roesch * @file: file that was modified
221766fa3cedSStefan Roesch * @flags: kiocb flags
221866fa3cedSStefan Roesch *
221966fa3cedSStefan Roesch * When file has been modified ensure that special
222066fa3cedSStefan Roesch * file privileges are removed and time settings are updated.
222166fa3cedSStefan Roesch *
222266fa3cedSStefan Roesch * If IOCB_NOWAIT is set, special file privileges will not be removed and
222366fa3cedSStefan Roesch * time settings will not be updated. It will return -EAGAIN.
222466fa3cedSStefan Roesch *
222566fa3cedSStefan Roesch * Context: Caller must hold the file's inode lock.
222666fa3cedSStefan Roesch *
222766fa3cedSStefan Roesch * Return: 0 on success, negative errno on failure.
222866fa3cedSStefan Roesch */
file_modified_flags(struct file * file,int flags)222966fa3cedSStefan Roesch static int file_modified_flags(struct file *file, int flags)
223066fa3cedSStefan Roesch {
223166fa3cedSStefan Roesch int ret;
223266fa3cedSStefan Roesch struct inode *inode = file_inode(file);
223366fa3cedSStefan Roesch
223466fa3cedSStefan Roesch /*
223566fa3cedSStefan Roesch * Clear the security bits if the process is not being run by root.
223666fa3cedSStefan Roesch * This keeps people from modifying setuid and setgid binaries.
223766fa3cedSStefan Roesch */
223866fa3cedSStefan Roesch ret = __file_remove_privs(file, flags);
223966fa3cedSStefan Roesch if (ret)
224066fa3cedSStefan Roesch return ret;
224166fa3cedSStefan Roesch
224266fa3cedSStefan Roesch if (unlikely(file->f_mode & FMODE_NOCMTIME))
224366fa3cedSStefan Roesch return 0;
224466fa3cedSStefan Roesch
2245913e9928SJeff Layton ret = inode_needs_update_time(inode);
224666fa3cedSStefan Roesch if (ret <= 0)
224766fa3cedSStefan Roesch return ret;
224866fa3cedSStefan Roesch if (flags & IOCB_NOWAIT)
224966fa3cedSStefan Roesch return -EAGAIN;
225066fa3cedSStefan Roesch
2251913e9928SJeff Layton return __file_update_time(file, ret);
225266fa3cedSStefan Roesch }
225366fa3cedSStefan Roesch
225466fa3cedSStefan Roesch /**
2255faf99b56SStefan Roesch * file_modified - handle mandated vfs changes when modifying a file
2256faf99b56SStefan Roesch * @file: file that was modified
2257faf99b56SStefan Roesch *
2258faf99b56SStefan Roesch * When file has been modified ensure that special
2259faf99b56SStefan Roesch * file privileges are removed and time settings are updated.
2260faf99b56SStefan Roesch *
2261faf99b56SStefan Roesch * Context: Caller must hold the file's inode lock.
2262faf99b56SStefan Roesch *
2263faf99b56SStefan Roesch * Return: 0 on success, negative errno on failure.
2264faf99b56SStefan Roesch */
file_modified(struct file * file)2265e38f7f53SAmir Goldstein int file_modified(struct file *file)
2266e38f7f53SAmir Goldstein {
226766fa3cedSStefan Roesch return file_modified_flags(file, 0);
2268e38f7f53SAmir Goldstein }
2269e38f7f53SAmir Goldstein EXPORT_SYMBOL(file_modified);
2270e38f7f53SAmir Goldstein
227166fa3cedSStefan Roesch /**
227266fa3cedSStefan Roesch * kiocb_modified - handle mandated vfs changes when modifying a file
227366fa3cedSStefan Roesch * @iocb: iocb that was modified
227466fa3cedSStefan Roesch *
227566fa3cedSStefan Roesch * When file has been modified ensure that special
227666fa3cedSStefan Roesch * file privileges are removed and time settings are updated.
227766fa3cedSStefan Roesch *
227866fa3cedSStefan Roesch * Context: Caller must hold the file's inode lock.
227966fa3cedSStefan Roesch *
228066fa3cedSStefan Roesch * Return: 0 on success, negative errno on failure.
228166fa3cedSStefan Roesch */
kiocb_modified(struct kiocb * iocb)228266fa3cedSStefan Roesch int kiocb_modified(struct kiocb *iocb)
228366fa3cedSStefan Roesch {
228466fa3cedSStefan Roesch return file_modified_flags(iocb->ki_filp, iocb->ki_flags);
228566fa3cedSStefan Roesch }
228666fa3cedSStefan Roesch EXPORT_SYMBOL_GPL(kiocb_modified);
228766fa3cedSStefan Roesch
inode_needs_sync(struct inode * inode)22881da177e4SLinus Torvalds int inode_needs_sync(struct inode *inode)
22891da177e4SLinus Torvalds {
22901da177e4SLinus Torvalds if (IS_SYNC(inode))
22911da177e4SLinus Torvalds return 1;
22921da177e4SLinus Torvalds if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
22931da177e4SLinus Torvalds return 1;
22941da177e4SLinus Torvalds return 0;
22951da177e4SLinus Torvalds }
22961da177e4SLinus Torvalds EXPORT_SYMBOL(inode_needs_sync);
22971da177e4SLinus Torvalds
22981da177e4SLinus Torvalds /*
2299168a9fd6SMiklos Szeredi * If we try to find an inode in the inode hash while it is being
2300168a9fd6SMiklos Szeredi * deleted, we have to wait until the filesystem completes its
2301168a9fd6SMiklos Szeredi * deletion before reporting that it isn't found. This function waits
2302168a9fd6SMiklos Szeredi * until the deletion _might_ have completed. Callers are responsible
2303168a9fd6SMiklos Szeredi * to recheck inode state.
2304168a9fd6SMiklos Szeredi *
2305eaff8079SChristoph Hellwig * It doesn't matter if I_NEW is not set initially, a call to
2306250df6edSDave Chinner * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
2307250df6edSDave Chinner * will DTRT.
23081da177e4SLinus Torvalds */
__wait_on_freeing_inode(struct inode * inode)23091da177e4SLinus Torvalds static void __wait_on_freeing_inode(struct inode *inode)
23101da177e4SLinus Torvalds {
23111da177e4SLinus Torvalds wait_queue_head_t *wq;
2312eaff8079SChristoph Hellwig DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
2313eaff8079SChristoph Hellwig wq = bit_waitqueue(&inode->i_state, __I_NEW);
231421417136SIngo Molnar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
2315250df6edSDave Chinner spin_unlock(&inode->i_lock);
231667a23c49SDave Chinner spin_unlock(&inode_hash_lock);
23171da177e4SLinus Torvalds schedule();
231821417136SIngo Molnar finish_wait(wq, &wait.wq_entry);
231967a23c49SDave Chinner spin_lock(&inode_hash_lock);
23201da177e4SLinus Torvalds }
23211da177e4SLinus Torvalds
23221da177e4SLinus Torvalds static __initdata unsigned long ihash_entries;
set_ihash_entries(char * str)23231da177e4SLinus Torvalds static int __init set_ihash_entries(char *str)
23241da177e4SLinus Torvalds {
23251da177e4SLinus Torvalds if (!str)
23261da177e4SLinus Torvalds return 0;
23271da177e4SLinus Torvalds ihash_entries = simple_strtoul(str, &str, 0);
23281da177e4SLinus Torvalds return 1;
23291da177e4SLinus Torvalds }
23301da177e4SLinus Torvalds __setup("ihash_entries=", set_ihash_entries);
23311da177e4SLinus Torvalds
23321da177e4SLinus Torvalds /*
23331da177e4SLinus Torvalds * Initialize the waitqueues and inode hash table.
23341da177e4SLinus Torvalds */
inode_init_early(void)23351da177e4SLinus Torvalds void __init inode_init_early(void)
23361da177e4SLinus Torvalds {
23371da177e4SLinus Torvalds /* If hashes are distributed across NUMA nodes, defer
23381da177e4SLinus Torvalds * hash allocation until vmalloc space is available.
23391da177e4SLinus Torvalds */
23401da177e4SLinus Torvalds if (hashdist)
23411da177e4SLinus Torvalds return;
23421da177e4SLinus Torvalds
23431da177e4SLinus Torvalds inode_hashtable =
23441da177e4SLinus Torvalds alloc_large_system_hash("Inode-cache",
23451da177e4SLinus Torvalds sizeof(struct hlist_head),
23461da177e4SLinus Torvalds ihash_entries,
23471da177e4SLinus Torvalds 14,
23483d375d78SPavel Tatashin HASH_EARLY | HASH_ZERO,
23491da177e4SLinus Torvalds &i_hash_shift,
23501da177e4SLinus Torvalds &i_hash_mask,
235131fe62b9STim Bird 0,
23521da177e4SLinus Torvalds 0);
23531da177e4SLinus Torvalds }
23541da177e4SLinus Torvalds
inode_init(void)235574bf17cfSDenis Cheng void __init inode_init(void)
23561da177e4SLinus Torvalds {
23571da177e4SLinus Torvalds /* inode slab cache */
2358b0196009SPaul Jackson inode_cachep = kmem_cache_create("inode_cache",
2359b0196009SPaul Jackson sizeof(struct inode),
2360b0196009SPaul Jackson 0,
2361b0196009SPaul Jackson (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
23625d097056SVladimir Davydov SLAB_MEM_SPREAD|SLAB_ACCOUNT),
236320c2df83SPaul Mundt init_once);
23641da177e4SLinus Torvalds
23651da177e4SLinus Torvalds /* Hash may have been set up in inode_init_early */
23661da177e4SLinus Torvalds if (!hashdist)
23671da177e4SLinus Torvalds return;
23681da177e4SLinus Torvalds
23691da177e4SLinus Torvalds inode_hashtable =
23701da177e4SLinus Torvalds alloc_large_system_hash("Inode-cache",
23711da177e4SLinus Torvalds sizeof(struct hlist_head),
23721da177e4SLinus Torvalds ihash_entries,
23731da177e4SLinus Torvalds 14,
23743d375d78SPavel Tatashin HASH_ZERO,
23751da177e4SLinus Torvalds &i_hash_shift,
23761da177e4SLinus Torvalds &i_hash_mask,
237731fe62b9STim Bird 0,
23781da177e4SLinus Torvalds 0);
23791da177e4SLinus Torvalds }
23801da177e4SLinus Torvalds
init_special_inode(struct inode * inode,umode_t mode,dev_t rdev)23811da177e4SLinus Torvalds void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
23821da177e4SLinus Torvalds {
23831da177e4SLinus Torvalds inode->i_mode = mode;
23841da177e4SLinus Torvalds if (S_ISCHR(mode)) {
23851da177e4SLinus Torvalds inode->i_fop = &def_chr_fops;
23861da177e4SLinus Torvalds inode->i_rdev = rdev;
23871da177e4SLinus Torvalds } else if (S_ISBLK(mode)) {
2388bda2795aSChristoph Hellwig if (IS_ENABLED(CONFIG_BLOCK))
23891da177e4SLinus Torvalds inode->i_fop = &def_blk_fops;
23901da177e4SLinus Torvalds inode->i_rdev = rdev;
23911da177e4SLinus Torvalds } else if (S_ISFIFO(mode))
2392599a0ac1SAl Viro inode->i_fop = &pipefifo_fops;
23931da177e4SLinus Torvalds else if (S_ISSOCK(mode))
2394bd9b51e7SAl Viro ; /* leave it no_open_fops */
23951da177e4SLinus Torvalds else
2396af0d9ae8SManish Katiyar printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
2397af0d9ae8SManish Katiyar " inode %s:%lu\n", mode, inode->i_sb->s_id,
2398af0d9ae8SManish Katiyar inode->i_ino);
23991da177e4SLinus Torvalds }
24001da177e4SLinus Torvalds EXPORT_SYMBOL(init_special_inode);
2401a1bd120dSDmitry Monakhov
2402a1bd120dSDmitry Monakhov /**
2403eaae668dSBen Hutchings * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
2404f2d40141SChristian Brauner * @idmap: idmap of the mount the inode was created from
2405a1bd120dSDmitry Monakhov * @inode: New inode
2406a1bd120dSDmitry Monakhov * @dir: Directory inode
2407a1bd120dSDmitry Monakhov * @mode: mode of the new inode
240821cb47beSChristian Brauner *
2409f2d40141SChristian Brauner * If the inode has been created through an idmapped mount the idmap of
2410f2d40141SChristian Brauner * the vfsmount must be passed through @idmap. This function will then take
2411f2d40141SChristian Brauner * care to map the inode according to @idmap before checking permissions
241221cb47beSChristian Brauner * and initializing i_uid and i_gid. On non-idmapped mounts or if permission
2413f2d40141SChristian Brauner * checking is to be performed on the raw inode simply pass @nop_mnt_idmap.
2414a1bd120dSDmitry Monakhov */
inode_init_owner(struct mnt_idmap * idmap,struct inode * inode,const struct inode * dir,umode_t mode)2415f2d40141SChristian Brauner void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
241621cb47beSChristian Brauner const struct inode *dir, umode_t mode)
2417a1bd120dSDmitry Monakhov {
2418c14329d3SChristian Brauner inode_fsuid_set(inode, idmap);
2419a1bd120dSDmitry Monakhov if (dir && dir->i_mode & S_ISGID) {
2420a1bd120dSDmitry Monakhov inode->i_gid = dir->i_gid;
24210fa3ecd8SLinus Torvalds
24220fa3ecd8SLinus Torvalds /* Directories are special, and always inherit S_ISGID */
2423a1bd120dSDmitry Monakhov if (S_ISDIR(mode))
2424a1bd120dSDmitry Monakhov mode |= S_ISGID;
2425a1bd120dSDmitry Monakhov } else
2426c14329d3SChristian Brauner inode_fsgid_set(inode, idmap);
2427a1bd120dSDmitry Monakhov inode->i_mode = mode;
2428a1bd120dSDmitry Monakhov }
2429a1bd120dSDmitry Monakhov EXPORT_SYMBOL(inode_init_owner);
2430e795b717SSerge E. Hallyn
24312e149670SSerge E. Hallyn /**
24322e149670SSerge E. Hallyn * inode_owner_or_capable - check current task permissions to inode
243301beba79SChristian Brauner * @idmap: idmap of the mount the inode was found from
24342e149670SSerge E. Hallyn * @inode: inode being checked
24352e149670SSerge E. Hallyn *
243623adbe12SAndy Lutomirski * Return true if current either has CAP_FOWNER in a namespace with the
243723adbe12SAndy Lutomirski * inode owner uid mapped, or owns the file.
243821cb47beSChristian Brauner *
243901beba79SChristian Brauner * If the inode has been found through an idmapped mount the idmap of
244001beba79SChristian Brauner * the vfsmount must be passed through @idmap. This function will then take
244101beba79SChristian Brauner * care to map the inode according to @idmap before checking permissions.
244221cb47beSChristian Brauner * On non-idmapped mounts or if permission checking is to be performed on the
244301beba79SChristian Brauner * raw inode simply passs @nop_mnt_idmap.
2444e795b717SSerge E. Hallyn */
inode_owner_or_capable(struct mnt_idmap * idmap,const struct inode * inode)244501beba79SChristian Brauner bool inode_owner_or_capable(struct mnt_idmap *idmap,
244621cb47beSChristian Brauner const struct inode *inode)
2447e795b717SSerge E. Hallyn {
2448a2bd096fSChristian Brauner vfsuid_t vfsuid;
244923adbe12SAndy Lutomirski struct user_namespace *ns;
245023adbe12SAndy Lutomirski
2451e67fe633SChristian Brauner vfsuid = i_uid_into_vfsuid(idmap, inode);
2452a2bd096fSChristian Brauner if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
2453e795b717SSerge E. Hallyn return true;
245423adbe12SAndy Lutomirski
245523adbe12SAndy Lutomirski ns = current_user_ns();
2456a2bd096fSChristian Brauner if (vfsuid_has_mapping(ns, vfsuid) && ns_capable(ns, CAP_FOWNER))
2457e795b717SSerge E. Hallyn return true;
2458e795b717SSerge E. Hallyn return false;
2459e795b717SSerge E. Hallyn }
24602e149670SSerge E. Hallyn EXPORT_SYMBOL(inode_owner_or_capable);
24611d59d61fSTrond Myklebust
24621d59d61fSTrond Myklebust /*
24631d59d61fSTrond Myklebust * Direct i/o helper functions
24641d59d61fSTrond Myklebust */
__inode_dio_wait(struct inode * inode)24651d59d61fSTrond Myklebust static void __inode_dio_wait(struct inode *inode)
24661d59d61fSTrond Myklebust {
24671d59d61fSTrond Myklebust wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
24681d59d61fSTrond Myklebust DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
24691d59d61fSTrond Myklebust
24701d59d61fSTrond Myklebust do {
247121417136SIngo Molnar prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
24721d59d61fSTrond Myklebust if (atomic_read(&inode->i_dio_count))
24731d59d61fSTrond Myklebust schedule();
24741d59d61fSTrond Myklebust } while (atomic_read(&inode->i_dio_count));
247521417136SIngo Molnar finish_wait(wq, &q.wq_entry);
24761d59d61fSTrond Myklebust }
24771d59d61fSTrond Myklebust
24781d59d61fSTrond Myklebust /**
24791d59d61fSTrond Myklebust * inode_dio_wait - wait for outstanding DIO requests to finish
24801d59d61fSTrond Myklebust * @inode: inode to wait for
24811d59d61fSTrond Myklebust *
24821d59d61fSTrond Myklebust * Waits for all pending direct I/O requests to finish so that we can
24831d59d61fSTrond Myklebust * proceed with a truncate or equivalent operation.
24841d59d61fSTrond Myklebust *
24851d59d61fSTrond Myklebust * Must be called under a lock that serializes taking new references
24861d59d61fSTrond Myklebust * to i_dio_count, usually by inode->i_mutex.
24871d59d61fSTrond Myklebust */
inode_dio_wait(struct inode * inode)24881d59d61fSTrond Myklebust void inode_dio_wait(struct inode *inode)
24891d59d61fSTrond Myklebust {
24901d59d61fSTrond Myklebust if (atomic_read(&inode->i_dio_count))
24911d59d61fSTrond Myklebust __inode_dio_wait(inode);
24921d59d61fSTrond Myklebust }
24931d59d61fSTrond Myklebust EXPORT_SYMBOL(inode_dio_wait);
24941d59d61fSTrond Myklebust
24951d59d61fSTrond Myklebust /*
24965f16f322STheodore Ts'o * inode_set_flags - atomically set some inode flags
24975f16f322STheodore Ts'o *
24985f16f322STheodore Ts'o * Note: the caller should be holding i_mutex, or else be sure that
24995f16f322STheodore Ts'o * they have exclusive access to the inode structure (i.e., while the
25005f16f322STheodore Ts'o * inode is being instantiated). The reason for the cmpxchg() loop
25015f16f322STheodore Ts'o * --- which wouldn't be necessary if all code paths which modify
25025f16f322STheodore Ts'o * i_flags actually followed this rule, is that there is at least one
25035fa8e0a1SJan Kara * code path which doesn't today so we use cmpxchg() out of an abundance
25045fa8e0a1SJan Kara * of caution.
25055f16f322STheodore Ts'o *
25065f16f322STheodore Ts'o * In the long run, i_mutex is overkill, and we should probably look
25075f16f322STheodore Ts'o * at using the i_lock spinlock to protect i_flags, and then make sure
25085f16f322STheodore Ts'o * it is so documented in include/linux/fs.h and that all code follows
25095f16f322STheodore Ts'o * the locking convention!!
25105f16f322STheodore Ts'o */
inode_set_flags(struct inode * inode,unsigned int flags,unsigned int mask)25115f16f322STheodore Ts'o void inode_set_flags(struct inode *inode, unsigned int flags,
25125f16f322STheodore Ts'o unsigned int mask)
25135f16f322STheodore Ts'o {
25145f16f322STheodore Ts'o WARN_ON_ONCE(flags & ~mask);
2515a905737fSVineet Gupta set_mask_bits(&inode->i_flags, mask, flags);
25165f16f322STheodore Ts'o }
25175f16f322STheodore Ts'o EXPORT_SYMBOL(inode_set_flags);
251821fc61c7SAl Viro
inode_nohighmem(struct inode * inode)251921fc61c7SAl Viro void inode_nohighmem(struct inode *inode)
252021fc61c7SAl Viro {
252121fc61c7SAl Viro mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
252221fc61c7SAl Viro }
252321fc61c7SAl Viro EXPORT_SYMBOL(inode_nohighmem);
25243cd88666SDeepa Dinamani
25253cd88666SDeepa Dinamani /**
252650e17c00SDeepa Dinamani * timestamp_truncate - Truncate timespec to a granularity
252750e17c00SDeepa Dinamani * @t: Timespec
252850e17c00SDeepa Dinamani * @inode: inode being updated
252950e17c00SDeepa Dinamani *
253050e17c00SDeepa Dinamani * Truncate a timespec to the granularity supported by the fs
253150e17c00SDeepa Dinamani * containing the inode. Always rounds down. gran must
253250e17c00SDeepa Dinamani * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
253350e17c00SDeepa Dinamani */
timestamp_truncate(struct timespec64 t,struct inode * inode)253450e17c00SDeepa Dinamani struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
253550e17c00SDeepa Dinamani {
253650e17c00SDeepa Dinamani struct super_block *sb = inode->i_sb;
253750e17c00SDeepa Dinamani unsigned int gran = sb->s_time_gran;
253850e17c00SDeepa Dinamani
253950e17c00SDeepa Dinamani t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
254050e17c00SDeepa Dinamani if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
254150e17c00SDeepa Dinamani t.tv_nsec = 0;
254250e17c00SDeepa Dinamani
254350e17c00SDeepa Dinamani /* Avoid division in the common cases 1 ns and 1 s. */
254450e17c00SDeepa Dinamani if (gran == 1)
254550e17c00SDeepa Dinamani ; /* nothing */
254650e17c00SDeepa Dinamani else if (gran == NSEC_PER_SEC)
254750e17c00SDeepa Dinamani t.tv_nsec = 0;
254850e17c00SDeepa Dinamani else if (gran > 1 && gran < NSEC_PER_SEC)
254950e17c00SDeepa Dinamani t.tv_nsec -= t.tv_nsec % gran;
255050e17c00SDeepa Dinamani else
255150e17c00SDeepa Dinamani WARN(1, "invalid file time granularity: %u", gran);
255250e17c00SDeepa Dinamani return t;
255350e17c00SDeepa Dinamani }
255450e17c00SDeepa Dinamani EXPORT_SYMBOL(timestamp_truncate);
255550e17c00SDeepa Dinamani
255650e17c00SDeepa Dinamani /**
25573cd88666SDeepa Dinamani * current_time - Return FS time
25583cd88666SDeepa Dinamani * @inode: inode.
25593cd88666SDeepa Dinamani *
25603cd88666SDeepa Dinamani * Return the current time truncated to the time granularity supported by
25613cd88666SDeepa Dinamani * the fs.
25623cd88666SDeepa Dinamani *
25633cd88666SDeepa Dinamani * Note that inode and inode->sb cannot be NULL.
25643cd88666SDeepa Dinamani * Otherwise, the function warns and returns time without truncation.
25653cd88666SDeepa Dinamani */
current_time(struct inode * inode)256695582b00SDeepa Dinamani struct timespec64 current_time(struct inode *inode)
25673cd88666SDeepa Dinamani {
2568d651d160SArnd Bergmann struct timespec64 now;
2569d651d160SArnd Bergmann
2570d651d160SArnd Bergmann ktime_get_coarse_real_ts64(&now);
257150e17c00SDeepa Dinamani return timestamp_truncate(now, inode);
25723cd88666SDeepa Dinamani }
25733cd88666SDeepa Dinamani EXPORT_SYMBOL(current_time);
25742b3416ceSYang Xu
25752b3416ceSYang Xu /**
25769b6304c1SJeff Layton * inode_set_ctime_current - set the ctime to current_time
25779b6304c1SJeff Layton * @inode: inode
25789b6304c1SJeff Layton *
25799b6304c1SJeff Layton * Set the inode->i_ctime to the current value for the inode. Returns
25809b6304c1SJeff Layton * the current value that was assigned to i_ctime.
25819b6304c1SJeff Layton */
inode_set_ctime_current(struct inode * inode)25829b6304c1SJeff Layton struct timespec64 inode_set_ctime_current(struct inode *inode)
25839b6304c1SJeff Layton {
2584647aa768SChristian Brauner struct timespec64 now = current_time(inode);
25859b6304c1SJeff Layton
2586647aa768SChristian Brauner inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
25879b6304c1SJeff Layton return now;
25889b6304c1SJeff Layton }
25899b6304c1SJeff Layton EXPORT_SYMBOL(inode_set_ctime_current);
25909b6304c1SJeff Layton
25919b6304c1SJeff Layton /**
259211c2a870SChristian Brauner * in_group_or_capable - check whether caller is CAP_FSETID privileged
25939452e93eSChristian Brauner * @idmap: idmap of the mount @inode was found from
259411c2a870SChristian Brauner * @inode: inode to check
259511c2a870SChristian Brauner * @vfsgid: the new/current vfsgid of @inode
259611c2a870SChristian Brauner *
259711c2a870SChristian Brauner * Check wether @vfsgid is in the caller's group list or if the caller is
259811c2a870SChristian Brauner * privileged with CAP_FSETID over @inode. This can be used to determine
259911c2a870SChristian Brauner * whether the setgid bit can be kept or must be dropped.
260011c2a870SChristian Brauner *
260111c2a870SChristian Brauner * Return: true if the caller is sufficiently privileged, false if not.
260211c2a870SChristian Brauner */
in_group_or_capable(struct mnt_idmap * idmap,const struct inode * inode,vfsgid_t vfsgid)26039452e93eSChristian Brauner bool in_group_or_capable(struct mnt_idmap *idmap,
260411c2a870SChristian Brauner const struct inode *inode, vfsgid_t vfsgid)
260511c2a870SChristian Brauner {
260611c2a870SChristian Brauner if (vfsgid_in_group_p(vfsgid))
260711c2a870SChristian Brauner return true;
26089452e93eSChristian Brauner if (capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
260911c2a870SChristian Brauner return true;
261011c2a870SChristian Brauner return false;
261111c2a870SChristian Brauner }
261211c2a870SChristian Brauner
261311c2a870SChristian Brauner /**
26142b3416ceSYang Xu * mode_strip_sgid - handle the sgid bit for non-directories
26159452e93eSChristian Brauner * @idmap: idmap of the mount the inode was created from
26162b3416ceSYang Xu * @dir: parent directory inode
26172b3416ceSYang Xu * @mode: mode of the file to be created in @dir
26182b3416ceSYang Xu *
26192b3416ceSYang Xu * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
26202b3416ceSYang Xu * raised and @dir has the S_ISGID bit raised ensure that the caller is
26212b3416ceSYang Xu * either in the group of the parent directory or they have CAP_FSETID
26222b3416ceSYang Xu * in their user namespace and are privileged over the parent directory.
26232b3416ceSYang Xu * In all other cases, strip the S_ISGID bit from @mode.
26242b3416ceSYang Xu *
26252b3416ceSYang Xu * Return: the new mode to use for the file
26262b3416ceSYang Xu */
mode_strip_sgid(struct mnt_idmap * idmap,const struct inode * dir,umode_t mode)26279452e93eSChristian Brauner umode_t mode_strip_sgid(struct mnt_idmap *idmap,
26282b3416ceSYang Xu const struct inode *dir, umode_t mode)
26292b3416ceSYang Xu {
26302b3416ceSYang Xu if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
26312b3416ceSYang Xu return mode;
26322b3416ceSYang Xu if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
26332b3416ceSYang Xu return mode;
2634e67fe633SChristian Brauner if (in_group_or_capable(idmap, dir, i_gid_into_vfsgid(idmap, dir)))
26352b3416ceSYang Xu return mode;
26362b3416ceSYang Xu return mode & ~S_ISGID;
26372b3416ceSYang Xu }
26382b3416ceSYang Xu EXPORT_SYMBOL(mode_strip_sgid);
2639