1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * (C) 1997 Linus Torvalds
44b4563dcSChristoph Hellwig * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
51da177e4SLinus Torvalds */
6e59cc473SAl Viro #include <linux/export.h>
71da177e4SLinus Torvalds #include <linux/fs.h>
85970e15dSJeff Layton #include <linux/filelock.h>
91da177e4SLinus Torvalds #include <linux/mm.h>
101da177e4SLinus Torvalds #include <linux/backing-dev.h>
111da177e4SLinus Torvalds #include <linux/hash.h>
121da177e4SLinus Torvalds #include <linux/swap.h>
131da177e4SLinus Torvalds #include <linux/security.h>
141da177e4SLinus Torvalds #include <linux/cdev.h>
1557c8a661SMike Rapoport #include <linux/memblock.h>
163be25f49SEric Paris #include <linux/fsnotify.h>
17fc33a7bbSChristoph Hellwig #include <linux/mount.h>
18f19d4a8fSAl Viro #include <linux/posix_acl.h>
194b4563dcSChristoph Hellwig #include <linux/buffer_head.h> /* for inode_has_buffers */
207ada4db8SMiklos Szeredi #include <linux/ratelimit.h>
21bc3b14cbSDave Chinner #include <linux/list_lru.h>
22ae5e165dSJeff Layton #include <linux/iversion.h>
230ae45f63STheodore Ts'o #include <trace/events/writeback.h>
24a66979abSDave Chinner #include "internal.h"
251da177e4SLinus Torvalds
261da177e4SLinus Torvalds /*
274b4563dcSChristoph Hellwig * Inode locking rules:
28250df6edSDave Chinner *
29250df6edSDave Chinner * inode->i_lock protects:
3010e14073SJchao Sun * inode->i_state, inode->i_hash, __iget(), inode->i_io_list
31bc3b14cbSDave Chinner * Inode LRU list locks protect:
3298b745c6SDave Chinner * inode->i_sb->s_inode_lru, inode->i_lru
3374278da9SDave Chinner * inode->i_sb->s_inode_list_lock protects:
3474278da9SDave Chinner * inode->i_sb->s_inodes, inode->i_sb_list
35f758eeabSChristoph Hellwig * bdi->wb.list_lock protects:
36c7f54084SDave Chinner * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
3767a23c49SDave Chinner * inode_hash_lock protects:
3867a23c49SDave Chinner * inode_hashtable, inode->i_hash
39250df6edSDave Chinner *
40250df6edSDave Chinner * Lock ordering:
4155fa6091SDave Chinner *
4274278da9SDave Chinner * inode->i_sb->s_inode_list_lock
4355fa6091SDave Chinner * inode->i_lock
44bc3b14cbSDave Chinner * Inode LRU list locks
45a66979abSDave Chinner *
46f758eeabSChristoph Hellwig * bdi->wb.list_lock
47a66979abSDave Chinner * inode->i_lock
4867a23c49SDave Chinner *
4967a23c49SDave Chinner * inode_hash_lock
5074278da9SDave Chinner * inode->i_sb->s_inode_list_lock
5167a23c49SDave Chinner * inode->i_lock
5267a23c49SDave Chinner *
5367a23c49SDave Chinner * iunique_lock
5467a23c49SDave Chinner * inode_hash_lock
55250df6edSDave Chinner */
56250df6edSDave Chinner
57fa3536ccSEric Dumazet static unsigned int i_hash_mask __read_mostly;
58fa3536ccSEric Dumazet static unsigned int i_hash_shift __read_mostly;
5967a23c49SDave Chinner static struct hlist_head *inode_hashtable __read_mostly;
6067a23c49SDave Chinner static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
611da177e4SLinus Torvalds
621da177e4SLinus Torvalds /*
637dcda1c9SJens Axboe * Empty aops. Can be used for the cases where the user does not
647dcda1c9SJens Axboe * define any of the address_space operations.
657dcda1c9SJens Axboe */
667dcda1c9SJens Axboe const struct address_space_operations empty_aops = {
677dcda1c9SJens Axboe };
687dcda1c9SJens Axboe EXPORT_SYMBOL(empty_aops);
697dcda1c9SJens Axboe
703942c07cSGlauber Costa static DEFINE_PER_CPU(unsigned long, nr_inodes);
713942c07cSGlauber Costa static DEFINE_PER_CPU(unsigned long, nr_unused);
72cffbc8aaSDave Chinner
73e18b890bSChristoph Lameter static struct kmem_cache *inode_cachep __read_mostly;
741da177e4SLinus Torvalds
get_nr_inodes(void)753942c07cSGlauber Costa static long get_nr_inodes(void)
76cffbc8aaSDave Chinner {
773e880fb5SNick Piggin int i;
783942c07cSGlauber Costa long sum = 0;
793e880fb5SNick Piggin for_each_possible_cpu(i)
803e880fb5SNick Piggin sum += per_cpu(nr_inodes, i);
813e880fb5SNick Piggin return sum < 0 ? 0 : sum;
82cffbc8aaSDave Chinner }
83cffbc8aaSDave Chinner
get_nr_inodes_unused(void)843942c07cSGlauber Costa static inline long get_nr_inodes_unused(void)
85cffbc8aaSDave Chinner {
86fcb94f72SDave Chinner int i;
873942c07cSGlauber Costa long sum = 0;
88fcb94f72SDave Chinner for_each_possible_cpu(i)
89fcb94f72SDave Chinner sum += per_cpu(nr_unused, i);
90fcb94f72SDave Chinner return sum < 0 ? 0 : sum;
91cffbc8aaSDave Chinner }
92cffbc8aaSDave Chinner
get_nr_dirty_inodes(void)933942c07cSGlauber Costa long get_nr_dirty_inodes(void)
94cffbc8aaSDave Chinner {
953e880fb5SNick Piggin /* not actually dirty inodes, but a wild approximation */
963942c07cSGlauber Costa long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
97cffbc8aaSDave Chinner return nr_dirty > 0 ? nr_dirty : 0;
98cffbc8aaSDave Chinner }
99cffbc8aaSDave Chinner
100cffbc8aaSDave Chinner /*
101cffbc8aaSDave Chinner * Handle nr_inode sysctl
102cffbc8aaSDave Chinner */
103cffbc8aaSDave Chinner #ifdef CONFIG_SYSCTL
1041d67fe58SLuis Chamberlain /*
1051d67fe58SLuis Chamberlain * Statistics gathering..
1061d67fe58SLuis Chamberlain */
1071d67fe58SLuis Chamberlain static struct inodes_stat_t inodes_stat;
1081d67fe58SLuis Chamberlain
proc_nr_inodes(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1091d67fe58SLuis Chamberlain static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
1101d67fe58SLuis Chamberlain size_t *lenp, loff_t *ppos)
111cffbc8aaSDave Chinner {
112cffbc8aaSDave Chinner inodes_stat.nr_inodes = get_nr_inodes();
113fcb94f72SDave Chinner inodes_stat.nr_unused = get_nr_inodes_unused();
1143942c07cSGlauber Costa return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
115cffbc8aaSDave Chinner }
1161d67fe58SLuis Chamberlain
1171d67fe58SLuis Chamberlain static struct ctl_table inodes_sysctls[] = {
1181d67fe58SLuis Chamberlain {
1191d67fe58SLuis Chamberlain .procname = "inode-nr",
1201d67fe58SLuis Chamberlain .data = &inodes_stat,
1211d67fe58SLuis Chamberlain .maxlen = 2*sizeof(long),
1221d67fe58SLuis Chamberlain .mode = 0444,
1231d67fe58SLuis Chamberlain .proc_handler = proc_nr_inodes,
1241d67fe58SLuis Chamberlain },
1251d67fe58SLuis Chamberlain {
1261d67fe58SLuis Chamberlain .procname = "inode-state",
1271d67fe58SLuis Chamberlain .data = &inodes_stat,
1281d67fe58SLuis Chamberlain .maxlen = 7*sizeof(long),
1291d67fe58SLuis Chamberlain .mode = 0444,
1301d67fe58SLuis Chamberlain .proc_handler = proc_nr_inodes,
1311d67fe58SLuis Chamberlain },
1321d67fe58SLuis Chamberlain { }
1331d67fe58SLuis Chamberlain };
1341d67fe58SLuis Chamberlain
init_fs_inode_sysctls(void)1351d67fe58SLuis Chamberlain static int __init init_fs_inode_sysctls(void)
1361d67fe58SLuis Chamberlain {
1371d67fe58SLuis Chamberlain register_sysctl_init("fs", inodes_sysctls);
1381d67fe58SLuis Chamberlain return 0;
1391d67fe58SLuis Chamberlain }
1401d67fe58SLuis Chamberlain early_initcall(init_fs_inode_sysctls);
141cffbc8aaSDave Chinner #endif
142cffbc8aaSDave Chinner
no_open(struct inode * inode,struct file * file)143bd9b51e7SAl Viro static int no_open(struct inode *inode, struct file *file)
144bd9b51e7SAl Viro {
145bd9b51e7SAl Viro return -ENXIO;
146bd9b51e7SAl Viro }
147bd9b51e7SAl Viro
1482cb1599fSDavid Chinner /**
1496e7c2b4dSMasahiro Yamada * inode_init_always - perform inode structure initialisation
1500bc02f3fSRandy Dunlap * @sb: superblock inode belongs to
1510bc02f3fSRandy Dunlap * @inode: inode to initialise
1522cb1599fSDavid Chinner *
1532cb1599fSDavid Chinner * These are initializations that need to be done on every inode
1542cb1599fSDavid Chinner * allocation as the fields are not initialised by slab allocation.
1552cb1599fSDavid Chinner */
inode_init_always(struct super_block * sb,struct inode * inode)15654e34621SChristoph Hellwig int inode_init_always(struct super_block *sb, struct inode *inode)
1571da177e4SLinus Torvalds {
1586e1d5dccSAlexey Dobriyan static const struct inode_operations empty_iops;
159bd9b51e7SAl Viro static const struct file_operations no_open_fops = {.open = no_open};
1601da177e4SLinus Torvalds struct address_space *const mapping = &inode->i_data;
1611da177e4SLinus Torvalds
1621da177e4SLinus Torvalds inode->i_sb = sb;
1631da177e4SLinus Torvalds inode->i_blkbits = sb->s_blocksize_bits;
1641da177e4SLinus Torvalds inode->i_flags = 0;
1658019ad13SPeter Zijlstra atomic64_set(&inode->i_sequence, 0);
1661da177e4SLinus Torvalds atomic_set(&inode->i_count, 1);
1671da177e4SLinus Torvalds inode->i_op = &empty_iops;
168bd9b51e7SAl Viro inode->i_fop = &no_open_fops;
169edbb35ccSEric Biggers inode->i_ino = 0;
170a78ef704SMiklos Szeredi inode->__i_nlink = 1;
1713ddcd056SLinus Torvalds inode->i_opflags = 0;
172d0a5b995SAndreas Gruenbacher if (sb->s_xattr)
173d0a5b995SAndreas Gruenbacher inode->i_opflags |= IOP_XATTR;
17492361636SEric W. Biederman i_uid_write(inode, 0);
17592361636SEric W. Biederman i_gid_write(inode, 0);
1761da177e4SLinus Torvalds atomic_set(&inode->i_writecount, 0);
1771da177e4SLinus Torvalds inode->i_size = 0;
178c75b1d94SJens Axboe inode->i_write_hint = WRITE_LIFE_NOT_SET;
1791da177e4SLinus Torvalds inode->i_blocks = 0;
1801da177e4SLinus Torvalds inode->i_bytes = 0;
1811da177e4SLinus Torvalds inode->i_generation = 0;
1821da177e4SLinus Torvalds inode->i_pipe = NULL;
1831da177e4SLinus Torvalds inode->i_cdev = NULL;
18461ba64fcSAl Viro inode->i_link = NULL;
18584e710daSAl Viro inode->i_dir_seq = 0;
1861da177e4SLinus Torvalds inode->i_rdev = 0;
1871da177e4SLinus Torvalds inode->dirtied_when = 0;
1886146f0d5SMimi Zohar
1893d65ae46STahsin Erdogan #ifdef CONFIG_CGROUP_WRITEBACK
1903d65ae46STahsin Erdogan inode->i_wb_frn_winner = 0;
1913d65ae46STahsin Erdogan inode->i_wb_frn_avg_time = 0;
1923d65ae46STahsin Erdogan inode->i_wb_frn_history = 0;
1933d65ae46STahsin Erdogan #endif
1943d65ae46STahsin Erdogan
195d475fd42SPeter Zijlstra spin_lock_init(&inode->i_lock);
196d475fd42SPeter Zijlstra lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
197d475fd42SPeter Zijlstra
1989902af79SAl Viro init_rwsem(&inode->i_rwsem);
1999902af79SAl Viro lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
200d475fd42SPeter Zijlstra
201bd5fe6c5SChristoph Hellwig atomic_set(&inode->i_dio_count, 0);
202d475fd42SPeter Zijlstra
2031da177e4SLinus Torvalds mapping->a_ops = &empty_aops;
2041da177e4SLinus Torvalds mapping->host = inode;
2051da177e4SLinus Torvalds mapping->flags = 0;
206829bc787SDarrick J. Wong mapping->wb_err = 0;
2074bb5f5d9SDavid Herrmann atomic_set(&mapping->i_mmap_writable, 0);
20809d91cdaSSong Liu #ifdef CONFIG_READ_ONLY_THP_FOR_FS
20909d91cdaSSong Liu atomic_set(&mapping->nr_thps, 0);
21009d91cdaSSong Liu #endif
2113c1d4378SHugh Dickins mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
212252aa6f5SRafael Aquini mapping->private_data = NULL;
2137d455e00SChris Mason mapping->writeback_index = 0;
21423ca067bSSebastian Andrzej Siewior init_rwsem(&mapping->invalidate_lock);
21523ca067bSSebastian Andrzej Siewior lockdep_set_class_and_name(&mapping->invalidate_lock,
21623ca067bSSebastian Andrzej Siewior &sb->s_type->invalidate_lock_key,
21723ca067bSSebastian Andrzej Siewior "mapping.invalidate_lock");
218*3461e3bfSChristoph Hellwig if (sb->s_iflags & SB_I_STABLE_WRITES)
219*3461e3bfSChristoph Hellwig mapping_set_stable_writes(mapping);
220e6c6e640SAl Viro inode->i_private = NULL;
2211da177e4SLinus Torvalds inode->i_mapping = mapping;
222b3d9b7a3SAl Viro INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */
223f19d4a8fSAl Viro #ifdef CONFIG_FS_POSIX_ACL
224f19d4a8fSAl Viro inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
225f19d4a8fSAl Viro #endif
2262cb1599fSDavid Chinner
2273be25f49SEric Paris #ifdef CONFIG_FSNOTIFY
2283be25f49SEric Paris inode->i_fsnotify_mask = 0;
2293be25f49SEric Paris #endif
2304a075e39SJeff Layton inode->i_flctx = NULL;
2312e488f13SDongliang Mu
2322e488f13SDongliang Mu if (unlikely(security_inode_alloc(inode)))
2332e488f13SDongliang Mu return -ENOMEM;
2343e880fb5SNick Piggin this_cpu_inc(nr_inodes);
235cffbc8aaSDave Chinner
23654e34621SChristoph Hellwig return 0;
2371da177e4SLinus Torvalds }
2382cb1599fSDavid Chinner EXPORT_SYMBOL(inode_init_always);
2392cb1599fSDavid Chinner
free_inode_nonrcu(struct inode * inode)240fdb0da89SAl Viro void free_inode_nonrcu(struct inode *inode)
241fdb0da89SAl Viro {
242fdb0da89SAl Viro kmem_cache_free(inode_cachep, inode);
243fdb0da89SAl Viro }
244fdb0da89SAl Viro EXPORT_SYMBOL(free_inode_nonrcu);
245fdb0da89SAl Viro
i_callback(struct rcu_head * head)246fdb0da89SAl Viro static void i_callback(struct rcu_head *head)
247fdb0da89SAl Viro {
248fdb0da89SAl Viro struct inode *inode = container_of(head, struct inode, i_rcu);
249fdb0da89SAl Viro if (inode->free_inode)
250fdb0da89SAl Viro inode->free_inode(inode);
251fdb0da89SAl Viro else
252fdb0da89SAl Viro free_inode_nonrcu(inode);
253fdb0da89SAl Viro }
254fdb0da89SAl Viro
alloc_inode(struct super_block * sb)2552cb1599fSDavid Chinner static struct inode *alloc_inode(struct super_block *sb)
2562cb1599fSDavid Chinner {
257fdb0da89SAl Viro const struct super_operations *ops = sb->s_op;
2582cb1599fSDavid Chinner struct inode *inode;
2592cb1599fSDavid Chinner
260fdb0da89SAl Viro if (ops->alloc_inode)
261fdb0da89SAl Viro inode = ops->alloc_inode(sb);
2622cb1599fSDavid Chinner else
2638b9f3ac5SMuchun Song inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL);
2642cb1599fSDavid Chinner
26554e34621SChristoph Hellwig if (!inode)
2662cb1599fSDavid Chinner return NULL;
26754e34621SChristoph Hellwig
26854e34621SChristoph Hellwig if (unlikely(inode_init_always(sb, inode))) {
269fdb0da89SAl Viro if (ops->destroy_inode) {
270fdb0da89SAl Viro ops->destroy_inode(inode);
271fdb0da89SAl Viro if (!ops->free_inode)
272fdb0da89SAl Viro return NULL;
273fdb0da89SAl Viro }
274fdb0da89SAl Viro inode->free_inode = ops->free_inode;
275fdb0da89SAl Viro i_callback(&inode->i_rcu);
27654e34621SChristoph Hellwig return NULL;
27754e34621SChristoph Hellwig }
27854e34621SChristoph Hellwig
27954e34621SChristoph Hellwig return inode;
2802cb1599fSDavid Chinner }
2811da177e4SLinus Torvalds
__destroy_inode(struct inode * inode)2822e00c97eSChristoph Hellwig void __destroy_inode(struct inode *inode)
2831da177e4SLinus Torvalds {
284b7542f8cSEric Sesterhenn BUG_ON(inode_has_buffers(inode));
28552ebea74STejun Heo inode_detach_wb(inode);
2861da177e4SLinus Torvalds security_inode_free(inode);
2873be25f49SEric Paris fsnotify_inode_delete(inode);
288f27a0fe0SJeff Layton locks_free_lock_context(inode);
2897ada4db8SMiklos Szeredi if (!inode->i_nlink) {
2907ada4db8SMiklos Szeredi WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
2917ada4db8SMiklos Szeredi atomic_long_dec(&inode->i_sb->s_remove_count);
2927ada4db8SMiklos Szeredi }
2937ada4db8SMiklos Szeredi
294f19d4a8fSAl Viro #ifdef CONFIG_FS_POSIX_ACL
295b8a7a3a6SAndreas Gruenbacher if (inode->i_acl && !is_uncached_acl(inode->i_acl))
296f19d4a8fSAl Viro posix_acl_release(inode->i_acl);
297b8a7a3a6SAndreas Gruenbacher if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
298f19d4a8fSAl Viro posix_acl_release(inode->i_default_acl);
299f19d4a8fSAl Viro #endif
3003e880fb5SNick Piggin this_cpu_dec(nr_inodes);
3012e00c97eSChristoph Hellwig }
3022e00c97eSChristoph Hellwig EXPORT_SYMBOL(__destroy_inode);
3032e00c97eSChristoph Hellwig
destroy_inode(struct inode * inode)30456b0dacfSChristoph Hellwig static void destroy_inode(struct inode *inode)
3052e00c97eSChristoph Hellwig {
306fdb0da89SAl Viro const struct super_operations *ops = inode->i_sb->s_op;
307fdb0da89SAl Viro
3087ccf19a8SNick Piggin BUG_ON(!list_empty(&inode->i_lru));
3092e00c97eSChristoph Hellwig __destroy_inode(inode);
310fdb0da89SAl Viro if (ops->destroy_inode) {
311fdb0da89SAl Viro ops->destroy_inode(inode);
312fdb0da89SAl Viro if (!ops->free_inode)
313fdb0da89SAl Viro return;
314fdb0da89SAl Viro }
315fdb0da89SAl Viro inode->free_inode = ops->free_inode;
316fa0d7e3dSNick Piggin call_rcu(&inode->i_rcu, i_callback);
3171da177e4SLinus Torvalds }
3181da177e4SLinus Torvalds
3197ada4db8SMiklos Szeredi /**
3207ada4db8SMiklos Szeredi * drop_nlink - directly drop an inode's link count
3217ada4db8SMiklos Szeredi * @inode: inode
3227ada4db8SMiklos Szeredi *
3237ada4db8SMiklos Szeredi * This is a low-level filesystem helper to replace any
3247ada4db8SMiklos Szeredi * direct filesystem manipulation of i_nlink. In cases
3257ada4db8SMiklos Szeredi * where we are attempting to track writes to the
3267ada4db8SMiklos Szeredi * filesystem, a decrement to zero means an imminent
3277ada4db8SMiklos Szeredi * write when the file is truncated and actually unlinked
3287ada4db8SMiklos Szeredi * on the filesystem.
3297ada4db8SMiklos Szeredi */
drop_nlink(struct inode * inode)3307ada4db8SMiklos Szeredi void drop_nlink(struct inode *inode)
3317ada4db8SMiklos Szeredi {
3327ada4db8SMiklos Szeredi WARN_ON(inode->i_nlink == 0);
3337ada4db8SMiklos Szeredi inode->__i_nlink--;
3347ada4db8SMiklos Szeredi if (!inode->i_nlink)
3357ada4db8SMiklos Szeredi atomic_long_inc(&inode->i_sb->s_remove_count);
3367ada4db8SMiklos Szeredi }
3377ada4db8SMiklos Szeredi EXPORT_SYMBOL(drop_nlink);
3387ada4db8SMiklos Szeredi
3397ada4db8SMiklos Szeredi /**
3407ada4db8SMiklos Szeredi * clear_nlink - directly zero an inode's link count
3417ada4db8SMiklos Szeredi * @inode: inode
3427ada4db8SMiklos Szeredi *
3437ada4db8SMiklos Szeredi * This is a low-level filesystem helper to replace any
3447ada4db8SMiklos Szeredi * direct filesystem manipulation of i_nlink. See
3457ada4db8SMiklos Szeredi * drop_nlink() for why we care about i_nlink hitting zero.
3467ada4db8SMiklos Szeredi */
clear_nlink(struct inode * inode)3477ada4db8SMiklos Szeredi void clear_nlink(struct inode *inode)
3487ada4db8SMiklos Szeredi {
3497ada4db8SMiklos Szeredi if (inode->i_nlink) {
3507ada4db8SMiklos Szeredi inode->__i_nlink = 0;
3517ada4db8SMiklos Szeredi atomic_long_inc(&inode->i_sb->s_remove_count);
3527ada4db8SMiklos Szeredi }
3537ada4db8SMiklos Szeredi }
3547ada4db8SMiklos Szeredi EXPORT_SYMBOL(clear_nlink);
3557ada4db8SMiklos Szeredi
3567ada4db8SMiklos Szeredi /**
3577ada4db8SMiklos Szeredi * set_nlink - directly set an inode's link count
3587ada4db8SMiklos Szeredi * @inode: inode
3597ada4db8SMiklos Szeredi * @nlink: new nlink (should be non-zero)
3607ada4db8SMiklos Szeredi *
3617ada4db8SMiklos Szeredi * This is a low-level filesystem helper to replace any
3627ada4db8SMiklos Szeredi * direct filesystem manipulation of i_nlink.
3637ada4db8SMiklos Szeredi */
set_nlink(struct inode * inode,unsigned int nlink)3647ada4db8SMiklos Szeredi void set_nlink(struct inode *inode, unsigned int nlink)
3657ada4db8SMiklos Szeredi {
3667ada4db8SMiklos Szeredi if (!nlink) {
3677ada4db8SMiklos Szeredi clear_nlink(inode);
3687ada4db8SMiklos Szeredi } else {
3697ada4db8SMiklos Szeredi /* Yes, some filesystems do change nlink from zero to one */
3707ada4db8SMiklos Szeredi if (inode->i_nlink == 0)
3717ada4db8SMiklos Szeredi atomic_long_dec(&inode->i_sb->s_remove_count);
3727ada4db8SMiklos Szeredi
3737ada4db8SMiklos Szeredi inode->__i_nlink = nlink;
3747ada4db8SMiklos Szeredi }
3757ada4db8SMiklos Szeredi }
3767ada4db8SMiklos Szeredi EXPORT_SYMBOL(set_nlink);
3777ada4db8SMiklos Szeredi
3787ada4db8SMiklos Szeredi /**
3797ada4db8SMiklos Szeredi * inc_nlink - directly increment an inode's link count
3807ada4db8SMiklos Szeredi * @inode: inode
3817ada4db8SMiklos Szeredi *
3827ada4db8SMiklos Szeredi * This is a low-level filesystem helper to replace any
3837ada4db8SMiklos Szeredi * direct filesystem manipulation of i_nlink. Currently,
3847ada4db8SMiklos Szeredi * it is only here for parity with dec_nlink().
3857ada4db8SMiklos Szeredi */
inc_nlink(struct inode * inode)3867ada4db8SMiklos Szeredi void inc_nlink(struct inode *inode)
3877ada4db8SMiklos Szeredi {
388f4e0c30cSAl Viro if (unlikely(inode->i_nlink == 0)) {
389f4e0c30cSAl Viro WARN_ON(!(inode->i_state & I_LINKABLE));
3907ada4db8SMiklos Szeredi atomic_long_dec(&inode->i_sb->s_remove_count);
391f4e0c30cSAl Viro }
3927ada4db8SMiklos Szeredi
3937ada4db8SMiklos Szeredi inode->__i_nlink++;
3947ada4db8SMiklos Szeredi }
3957ada4db8SMiklos Szeredi EXPORT_SYMBOL(inc_nlink);
3967ada4db8SMiklos Szeredi
__address_space_init_once(struct address_space * mapping)397ae23395dSDave Chinner static void __address_space_init_once(struct address_space *mapping)
3982aa15890SMiklos Szeredi {
3997b785645SJohannes Weiner xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
400c8c06efaSDavidlohr Bueso init_rwsem(&mapping->i_mmap_rwsem);
4012aa15890SMiklos Szeredi INIT_LIST_HEAD(&mapping->private_list);
4022aa15890SMiklos Szeredi spin_lock_init(&mapping->private_lock);
403f808c13fSDavidlohr Bueso mapping->i_mmap = RB_ROOT_CACHED;
4042aa15890SMiklos Szeredi }
405ae23395dSDave Chinner
address_space_init_once(struct address_space * mapping)406ae23395dSDave Chinner void address_space_init_once(struct address_space *mapping)
407ae23395dSDave Chinner {
408ae23395dSDave Chinner memset(mapping, 0, sizeof(*mapping));
409ae23395dSDave Chinner __address_space_init_once(mapping);
410ae23395dSDave Chinner }
4112aa15890SMiklos Szeredi EXPORT_SYMBOL(address_space_init_once);
4122aa15890SMiklos Szeredi
4131da177e4SLinus Torvalds /*
4141da177e4SLinus Torvalds * These are initializations that only need to be done
4151da177e4SLinus Torvalds * once, because the fields are idempotent across use
4161da177e4SLinus Torvalds * of the inode, so let the slab aware of that.
4171da177e4SLinus Torvalds */
inode_init_once(struct inode * inode)4181da177e4SLinus Torvalds void inode_init_once(struct inode *inode)
4191da177e4SLinus Torvalds {
4201da177e4SLinus Torvalds memset(inode, 0, sizeof(*inode));
4211da177e4SLinus Torvalds INIT_HLIST_NODE(&inode->i_hash);
4221da177e4SLinus Torvalds INIT_LIST_HEAD(&inode->i_devices);
423c7f54084SDave Chinner INIT_LIST_HEAD(&inode->i_io_list);
4246c60d2b5SDave Chinner INIT_LIST_HEAD(&inode->i_wb_list);
4257ccf19a8SNick Piggin INIT_LIST_HEAD(&inode->i_lru);
42618cc912bSJeff Layton INIT_LIST_HEAD(&inode->i_sb_list);
427ae23395dSDave Chinner __address_space_init_once(&inode->i_data);
4281da177e4SLinus Torvalds i_size_ordered_init(inode);
4291da177e4SLinus Torvalds }
4301da177e4SLinus Torvalds EXPORT_SYMBOL(inode_init_once);
4311da177e4SLinus Torvalds
init_once(void * foo)43251cc5068SAlexey Dobriyan static void init_once(void *foo)
4331da177e4SLinus Torvalds {
4341da177e4SLinus Torvalds struct inode *inode = (struct inode *) foo;
4351da177e4SLinus Torvalds
4361da177e4SLinus Torvalds inode_init_once(inode);
4371da177e4SLinus Torvalds }
4381da177e4SLinus Torvalds
4391da177e4SLinus Torvalds /*
440250df6edSDave Chinner * inode->i_lock must be held
4411da177e4SLinus Torvalds */
__iget(struct inode * inode)4421da177e4SLinus Torvalds void __iget(struct inode *inode)
4431da177e4SLinus Torvalds {
4449e38d86fSNick Piggin atomic_inc(&inode->i_count);
4451da177e4SLinus Torvalds }
4461da177e4SLinus Torvalds
4477de9c6eeSAl Viro /*
4487de9c6eeSAl Viro * get additional reference to inode; caller must already hold one.
4497de9c6eeSAl Viro */
ihold(struct inode * inode)4507de9c6eeSAl Viro void ihold(struct inode *inode)
4517de9c6eeSAl Viro {
4527de9c6eeSAl Viro WARN_ON(atomic_inc_return(&inode->i_count) < 2);
4537de9c6eeSAl Viro }
4547de9c6eeSAl Viro EXPORT_SYMBOL(ihold);
4557de9c6eeSAl Viro
__inode_add_lru(struct inode * inode,bool rotate)45651b8c1feSJohannes Weiner static void __inode_add_lru(struct inode *inode, bool rotate)
4579e38d86fSNick Piggin {
45851b8c1feSJohannes Weiner if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE))
45951b8c1feSJohannes Weiner return;
46051b8c1feSJohannes Weiner if (atomic_read(&inode->i_count))
46151b8c1feSJohannes Weiner return;
46251b8c1feSJohannes Weiner if (!(inode->i_sb->s_flags & SB_ACTIVE))
46351b8c1feSJohannes Weiner return;
46451b8c1feSJohannes Weiner if (!mapping_shrinkable(&inode->i_data))
46551b8c1feSJohannes Weiner return;
46651b8c1feSJohannes Weiner
467bc3b14cbSDave Chinner if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
468fcb94f72SDave Chinner this_cpu_inc(nr_unused);
46951b8c1feSJohannes Weiner else if (rotate)
470563f4001SJosef Bacik inode->i_state |= I_REFERENCED;
4719e38d86fSNick Piggin }
4729e38d86fSNick Piggin
4734eff96ddSJan Kara /*
4744eff96ddSJan Kara * Add inode to LRU if needed (inode is unused and clean).
4754eff96ddSJan Kara *
4764eff96ddSJan Kara * Needs inode->i_lock held.
4774eff96ddSJan Kara */
inode_add_lru(struct inode * inode)4784eff96ddSJan Kara void inode_add_lru(struct inode *inode)
4794eff96ddSJan Kara {
48051b8c1feSJohannes Weiner __inode_add_lru(inode, false);
4814eff96ddSJan Kara }
4824eff96ddSJan Kara
inode_lru_list_del(struct inode * inode)4839e38d86fSNick Piggin static void inode_lru_list_del(struct inode *inode)
4849e38d86fSNick Piggin {
485bc3b14cbSDave Chinner if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
486fcb94f72SDave Chinner this_cpu_dec(nr_unused);
4871da177e4SLinus Torvalds }
4881da177e4SLinus Torvalds
489646ec461SChristoph Hellwig /**
490646ec461SChristoph Hellwig * inode_sb_list_add - add inode to the superblock list of inodes
491646ec461SChristoph Hellwig * @inode: inode to add
492646ec461SChristoph Hellwig */
inode_sb_list_add(struct inode * inode)493646ec461SChristoph Hellwig void inode_sb_list_add(struct inode *inode)
494646ec461SChristoph Hellwig {
49574278da9SDave Chinner spin_lock(&inode->i_sb->s_inode_list_lock);
49655fa6091SDave Chinner list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
49774278da9SDave Chinner spin_unlock(&inode->i_sb->s_inode_list_lock);
498646ec461SChristoph Hellwig }
499646ec461SChristoph Hellwig EXPORT_SYMBOL_GPL(inode_sb_list_add);
500646ec461SChristoph Hellwig
inode_sb_list_del(struct inode * inode)50155fa6091SDave Chinner static inline void inode_sb_list_del(struct inode *inode)
502646ec461SChristoph Hellwig {
503a209dfc7SEric Dumazet if (!list_empty(&inode->i_sb_list)) {
50474278da9SDave Chinner spin_lock(&inode->i_sb->s_inode_list_lock);
505646ec461SChristoph Hellwig list_del_init(&inode->i_sb_list);
50674278da9SDave Chinner spin_unlock(&inode->i_sb->s_inode_list_lock);
507646ec461SChristoph Hellwig }
508a209dfc7SEric Dumazet }
509646ec461SChristoph Hellwig
hash(struct super_block * sb,unsigned long hashval)5104c51acbcSDave Chinner static unsigned long hash(struct super_block *sb, unsigned long hashval)
5114c51acbcSDave Chinner {
5124c51acbcSDave Chinner unsigned long tmp;
5134c51acbcSDave Chinner
5144c51acbcSDave Chinner tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
5154c51acbcSDave Chinner L1_CACHE_BYTES;
5164b4563dcSChristoph Hellwig tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
5174b4563dcSChristoph Hellwig return tmp & i_hash_mask;
5184c51acbcSDave Chinner }
5194c51acbcSDave Chinner
5204c51acbcSDave Chinner /**
5214c51acbcSDave Chinner * __insert_inode_hash - hash an inode
5224c51acbcSDave Chinner * @inode: unhashed inode
5234c51acbcSDave Chinner * @hashval: unsigned long value used to locate this object in the
5244c51acbcSDave Chinner * inode_hashtable.
5254c51acbcSDave Chinner *
5264c51acbcSDave Chinner * Add an inode to the inode hash for this superblock.
5274c51acbcSDave Chinner */
__insert_inode_hash(struct inode * inode,unsigned long hashval)5284c51acbcSDave Chinner void __insert_inode_hash(struct inode *inode, unsigned long hashval)
5294c51acbcSDave Chinner {
530646ec461SChristoph Hellwig struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
531646ec461SChristoph Hellwig
53267a23c49SDave Chinner spin_lock(&inode_hash_lock);
533250df6edSDave Chinner spin_lock(&inode->i_lock);
5343f19b2abSDavid Howells hlist_add_head_rcu(&inode->i_hash, b);
535250df6edSDave Chinner spin_unlock(&inode->i_lock);
53667a23c49SDave Chinner spin_unlock(&inode_hash_lock);
5374c51acbcSDave Chinner }
5384c51acbcSDave Chinner EXPORT_SYMBOL(__insert_inode_hash);
5394c51acbcSDave Chinner
5404c51acbcSDave Chinner /**
541f2ee7abfSEric Dumazet * __remove_inode_hash - remove an inode from the hash
5424c51acbcSDave Chinner * @inode: inode to unhash
5434c51acbcSDave Chinner *
5444c51acbcSDave Chinner * Remove an inode from the superblock.
5454c51acbcSDave Chinner */
__remove_inode_hash(struct inode * inode)546f2ee7abfSEric Dumazet void __remove_inode_hash(struct inode *inode)
5474c51acbcSDave Chinner {
54867a23c49SDave Chinner spin_lock(&inode_hash_lock);
549250df6edSDave Chinner spin_lock(&inode->i_lock);
5503f19b2abSDavid Howells hlist_del_init_rcu(&inode->i_hash);
551250df6edSDave Chinner spin_unlock(&inode->i_lock);
55267a23c49SDave Chinner spin_unlock(&inode_hash_lock);
5534c51acbcSDave Chinner }
554f2ee7abfSEric Dumazet EXPORT_SYMBOL(__remove_inode_hash);
5554c51acbcSDave Chinner
dump_mapping(const struct address_space * mapping)5563e9d80a8SMatthew Wilcox (Oracle) void dump_mapping(const struct address_space *mapping)
5573e9d80a8SMatthew Wilcox (Oracle) {
5583e9d80a8SMatthew Wilcox (Oracle) struct inode *host;
5593e9d80a8SMatthew Wilcox (Oracle) const struct address_space_operations *a_ops;
5603e9d80a8SMatthew Wilcox (Oracle) struct hlist_node *dentry_first;
5613e9d80a8SMatthew Wilcox (Oracle) struct dentry *dentry_ptr;
5623e9d80a8SMatthew Wilcox (Oracle) struct dentry dentry;
5633e9d80a8SMatthew Wilcox (Oracle) unsigned long ino;
5643e9d80a8SMatthew Wilcox (Oracle)
5653e9d80a8SMatthew Wilcox (Oracle) /*
5663e9d80a8SMatthew Wilcox (Oracle) * If mapping is an invalid pointer, we don't want to crash
5673e9d80a8SMatthew Wilcox (Oracle) * accessing it, so probe everything depending on it carefully.
5683e9d80a8SMatthew Wilcox (Oracle) */
5693e9d80a8SMatthew Wilcox (Oracle) if (get_kernel_nofault(host, &mapping->host) ||
5703e9d80a8SMatthew Wilcox (Oracle) get_kernel_nofault(a_ops, &mapping->a_ops)) {
5713e9d80a8SMatthew Wilcox (Oracle) pr_warn("invalid mapping:%px\n", mapping);
5723e9d80a8SMatthew Wilcox (Oracle) return;
5733e9d80a8SMatthew Wilcox (Oracle) }
5743e9d80a8SMatthew Wilcox (Oracle)
5753e9d80a8SMatthew Wilcox (Oracle) if (!host) {
5763e9d80a8SMatthew Wilcox (Oracle) pr_warn("aops:%ps\n", a_ops);
5773e9d80a8SMatthew Wilcox (Oracle) return;
5783e9d80a8SMatthew Wilcox (Oracle) }
5793e9d80a8SMatthew Wilcox (Oracle)
5803e9d80a8SMatthew Wilcox (Oracle) if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
5813e9d80a8SMatthew Wilcox (Oracle) get_kernel_nofault(ino, &host->i_ino)) {
5823e9d80a8SMatthew Wilcox (Oracle) pr_warn("aops:%ps invalid inode:%px\n", a_ops, host);
5833e9d80a8SMatthew Wilcox (Oracle) return;
5843e9d80a8SMatthew Wilcox (Oracle) }
5853e9d80a8SMatthew Wilcox (Oracle)
5863e9d80a8SMatthew Wilcox (Oracle) if (!dentry_first) {
5873e9d80a8SMatthew Wilcox (Oracle) pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
5883e9d80a8SMatthew Wilcox (Oracle) return;
5893e9d80a8SMatthew Wilcox (Oracle) }
5903e9d80a8SMatthew Wilcox (Oracle)
5913e9d80a8SMatthew Wilcox (Oracle) dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
5923e9d80a8SMatthew Wilcox (Oracle) if (get_kernel_nofault(dentry, dentry_ptr)) {
5933e9d80a8SMatthew Wilcox (Oracle) pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
5943e9d80a8SMatthew Wilcox (Oracle) a_ops, ino, dentry_ptr);
5953e9d80a8SMatthew Wilcox (Oracle) return;
5963e9d80a8SMatthew Wilcox (Oracle) }
5973e9d80a8SMatthew Wilcox (Oracle)
5983e9d80a8SMatthew Wilcox (Oracle) /*
5993e9d80a8SMatthew Wilcox (Oracle) * if dentry is corrupted, the %pd handler may still crash,
6003e9d80a8SMatthew Wilcox (Oracle) * but it's unlikely that we reach here with a corrupt mapping
6013e9d80a8SMatthew Wilcox (Oracle) */
6023e9d80a8SMatthew Wilcox (Oracle) pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
6033e9d80a8SMatthew Wilcox (Oracle) }
6043e9d80a8SMatthew Wilcox (Oracle)
clear_inode(struct inode * inode)605dbd5768fSJan Kara void clear_inode(struct inode *inode)
606b0683aa6SAl Viro {
60708142579SJan Kara /*
608b93b0163SMatthew Wilcox * We have to cycle the i_pages lock here because reclaim can be in the
6096ffcd825SMatthew Wilcox (Oracle) * process of removing the last page (in __filemap_remove_folio())
610b93b0163SMatthew Wilcox * and we must not free the mapping under it.
61108142579SJan Kara */
612b93b0163SMatthew Wilcox xa_lock_irq(&inode->i_data.i_pages);
613b0683aa6SAl Viro BUG_ON(inode->i_data.nrpages);
614786b3112SHugh Dickins /*
615786b3112SHugh Dickins * Almost always, mapping_empty(&inode->i_data) here; but there are
616786b3112SHugh Dickins * two known and long-standing ways in which nodes may get left behind
617786b3112SHugh Dickins * (when deep radix-tree node allocation failed partway; or when THP
618786b3112SHugh Dickins * collapse_file() failed). Until those two known cases are cleaned up,
619786b3112SHugh Dickins * or a cleanup function is called here, do not BUG_ON(!mapping_empty),
620786b3112SHugh Dickins * nor even WARN_ON(!mapping_empty).
621786b3112SHugh Dickins */
622b93b0163SMatthew Wilcox xa_unlock_irq(&inode->i_data.i_pages);
623b0683aa6SAl Viro BUG_ON(!list_empty(&inode->i_data.private_list));
624b0683aa6SAl Viro BUG_ON(!(inode->i_state & I_FREEING));
625b0683aa6SAl Viro BUG_ON(inode->i_state & I_CLEAR);
6266c60d2b5SDave Chinner BUG_ON(!list_empty(&inode->i_wb_list));
627fa0d7e3dSNick Piggin /* don't need i_lock here, no concurrent mods to i_state */
628b0683aa6SAl Viro inode->i_state = I_FREEING | I_CLEAR;
629b0683aa6SAl Viro }
630dbd5768fSJan Kara EXPORT_SYMBOL(clear_inode);
631b0683aa6SAl Viro
632b2b2af8eSDave Chinner /*
633b2b2af8eSDave Chinner * Free the inode passed in, removing it from the lists it is still connected
634b2b2af8eSDave Chinner * to. We remove any pages still attached to the inode and wait for any IO that
635b2b2af8eSDave Chinner * is still in progress before finally destroying the inode.
636b2b2af8eSDave Chinner *
637b2b2af8eSDave Chinner * An inode must already be marked I_FREEING so that we avoid the inode being
638b2b2af8eSDave Chinner * moved back onto lists if we race with other code that manipulates the lists
639b2b2af8eSDave Chinner * (e.g. writeback_single_inode). The caller is responsible for setting this.
640b2b2af8eSDave Chinner *
641b2b2af8eSDave Chinner * An inode must already be removed from the LRU list before being evicted from
642b2b2af8eSDave Chinner * the cache. This should occur atomically with setting the I_FREEING state
643b2b2af8eSDave Chinner * flag, so no inodes here should ever be on the LRU when being evicted.
644b2b2af8eSDave Chinner */
evict(struct inode * inode)645644da596SAl Viro static void evict(struct inode *inode)
646b4272d4cSAl Viro {
647b4272d4cSAl Viro const struct super_operations *op = inode->i_sb->s_op;
648b4272d4cSAl Viro
649b2b2af8eSDave Chinner BUG_ON(!(inode->i_state & I_FREEING));
650b2b2af8eSDave Chinner BUG_ON(!list_empty(&inode->i_lru));
651b2b2af8eSDave Chinner
652c7f54084SDave Chinner if (!list_empty(&inode->i_io_list))
653c7f54084SDave Chinner inode_io_list_del(inode);
654b12362bdSEric Dumazet
65555fa6091SDave Chinner inode_sb_list_del(inode);
65655fa6091SDave Chinner
657169ebd90SJan Kara /*
658169ebd90SJan Kara * Wait for flusher thread to be done with the inode so that filesystem
659169ebd90SJan Kara * does not start destroying it while writeback is still running. Since
660169ebd90SJan Kara * the inode has I_FREEING set, flusher thread won't start new work on
661169ebd90SJan Kara * the inode. We just have to wait for running writeback to finish.
662169ebd90SJan Kara */
663169ebd90SJan Kara inode_wait_for_writeback(inode);
6647994e6f7SJan Kara
665be7ce416SAl Viro if (op->evict_inode) {
666be7ce416SAl Viro op->evict_inode(inode);
667b4272d4cSAl Viro } else {
66891b0abe3SJohannes Weiner truncate_inode_pages_final(&inode->i_data);
669dbd5768fSJan Kara clear_inode(inode);
670b4272d4cSAl Viro }
671661074e9SAl Viro if (S_ISCHR(inode->i_mode) && inode->i_cdev)
672661074e9SAl Viro cd_forget(inode);
673b2b2af8eSDave Chinner
674b2b2af8eSDave Chinner remove_inode_hash(inode);
675b2b2af8eSDave Chinner
676b2b2af8eSDave Chinner spin_lock(&inode->i_lock);
677b2b2af8eSDave Chinner wake_up_bit(&inode->i_state, __I_NEW);
678b2b2af8eSDave Chinner BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
679b2b2af8eSDave Chinner spin_unlock(&inode->i_lock);
680b2b2af8eSDave Chinner
681b2b2af8eSDave Chinner destroy_inode(inode);
682b4272d4cSAl Viro }
683b4272d4cSAl Viro
6841da177e4SLinus Torvalds /*
6851da177e4SLinus Torvalds * dispose_list - dispose of the contents of a local list
6861da177e4SLinus Torvalds * @head: the head of the list to free
6871da177e4SLinus Torvalds *
6881da177e4SLinus Torvalds * Dispose-list gets a local list with local inodes in it, so it doesn't
6891da177e4SLinus Torvalds * need to worry about list corruption and SMP locks.
6901da177e4SLinus Torvalds */
dispose_list(struct list_head * head)6911da177e4SLinus Torvalds static void dispose_list(struct list_head *head)
6921da177e4SLinus Torvalds {
6931da177e4SLinus Torvalds while (!list_empty(head)) {
6941da177e4SLinus Torvalds struct inode *inode;
6951da177e4SLinus Torvalds
6967ccf19a8SNick Piggin inode = list_first_entry(head, struct inode, i_lru);
6977ccf19a8SNick Piggin list_del_init(&inode->i_lru);
6981da177e4SLinus Torvalds
699644da596SAl Viro evict(inode);
700ac05fbb4SJosef Bacik cond_resched();
7011da177e4SLinus Torvalds }
7021da177e4SLinus Torvalds }
7031da177e4SLinus Torvalds
7041da177e4SLinus Torvalds /**
70563997e98SAl Viro * evict_inodes - evict all evictable inodes for a superblock
70663997e98SAl Viro * @sb: superblock to operate on
7071da177e4SLinus Torvalds *
70863997e98SAl Viro * Make sure that no inodes with zero refcount are retained. This is
7091751e8a6SLinus Torvalds * called by superblock shutdown after having SB_ACTIVE flag removed,
71063997e98SAl Viro * so any inode reaching zero refcount during or after that call will
71163997e98SAl Viro * be immediately evicted.
71263997e98SAl Viro */
evict_inodes(struct super_block * sb)71363997e98SAl Viro void evict_inodes(struct super_block *sb)
71463997e98SAl Viro {
71563997e98SAl Viro struct inode *inode, *next;
71663997e98SAl Viro LIST_HEAD(dispose);
71763997e98SAl Viro
718ac05fbb4SJosef Bacik again:
71974278da9SDave Chinner spin_lock(&sb->s_inode_list_lock);
72063997e98SAl Viro list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
72163997e98SAl Viro if (atomic_read(&inode->i_count))
72263997e98SAl Viro continue;
723250df6edSDave Chinner
724250df6edSDave Chinner spin_lock(&inode->i_lock);
725250df6edSDave Chinner if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
726250df6edSDave Chinner spin_unlock(&inode->i_lock);
72763997e98SAl Viro continue;
728250df6edSDave Chinner }
72963997e98SAl Viro
73063997e98SAl Viro inode->i_state |= I_FREEING;
73102afc410SDave Chinner inode_lru_list_del(inode);
732250df6edSDave Chinner spin_unlock(&inode->i_lock);
73302afc410SDave Chinner list_add(&inode->i_lru, &dispose);
734ac05fbb4SJosef Bacik
735ac05fbb4SJosef Bacik /*
736ac05fbb4SJosef Bacik * We can have a ton of inodes to evict at unmount time given
737ac05fbb4SJosef Bacik * enough memory, check to see if we need to go to sleep for a
738ac05fbb4SJosef Bacik * bit so we don't livelock.
739ac05fbb4SJosef Bacik */
740ac05fbb4SJosef Bacik if (need_resched()) {
741ac05fbb4SJosef Bacik spin_unlock(&sb->s_inode_list_lock);
742ac05fbb4SJosef Bacik cond_resched();
743ac05fbb4SJosef Bacik dispose_list(&dispose);
744ac05fbb4SJosef Bacik goto again;
745ac05fbb4SJosef Bacik }
74663997e98SAl Viro }
74774278da9SDave Chinner spin_unlock(&sb->s_inode_list_lock);
74863997e98SAl Viro
74963997e98SAl Viro dispose_list(&dispose);
75063997e98SAl Viro }
751799ea9e9SDarrick J. Wong EXPORT_SYMBOL_GPL(evict_inodes);
75263997e98SAl Viro
75363997e98SAl Viro /**
754a0318786SChristoph Hellwig * invalidate_inodes - attempt to free all inodes on a superblock
755a0318786SChristoph Hellwig * @sb: superblock to operate on
756a0318786SChristoph Hellwig *
757e127b9bcSChristoph Hellwig * Attempts to free all inodes (including dirty inodes) for a given superblock.
7581da177e4SLinus Torvalds */
invalidate_inodes(struct super_block * sb)759e127b9bcSChristoph Hellwig void invalidate_inodes(struct super_block *sb)
7601da177e4SLinus Torvalds {
761a0318786SChristoph Hellwig struct inode *inode, *next;
762a0318786SChristoph Hellwig LIST_HEAD(dispose);
7631da177e4SLinus Torvalds
76404646aebSEric Sandeen again:
76574278da9SDave Chinner spin_lock(&sb->s_inode_list_lock);
766a0318786SChristoph Hellwig list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
767250df6edSDave Chinner spin_lock(&inode->i_lock);
768250df6edSDave Chinner if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
769250df6edSDave Chinner spin_unlock(&inode->i_lock);
7701da177e4SLinus Torvalds continue;
771250df6edSDave Chinner }
77299a38919SChristoph Hellwig if (atomic_read(&inode->i_count)) {
773250df6edSDave Chinner spin_unlock(&inode->i_lock);
77499a38919SChristoph Hellwig continue;
77599a38919SChristoph Hellwig }
77699a38919SChristoph Hellwig
7771da177e4SLinus Torvalds inode->i_state |= I_FREEING;
77802afc410SDave Chinner inode_lru_list_del(inode);
779250df6edSDave Chinner spin_unlock(&inode->i_lock);
78002afc410SDave Chinner list_add(&inode->i_lru, &dispose);
78104646aebSEric Sandeen if (need_resched()) {
78204646aebSEric Sandeen spin_unlock(&sb->s_inode_list_lock);
78304646aebSEric Sandeen cond_resched();
78404646aebSEric Sandeen dispose_list(&dispose);
78504646aebSEric Sandeen goto again;
78604646aebSEric Sandeen }
7871da177e4SLinus Torvalds }
78874278da9SDave Chinner spin_unlock(&sb->s_inode_list_lock);
7891da177e4SLinus Torvalds
790a0318786SChristoph Hellwig dispose_list(&dispose);
7911da177e4SLinus Torvalds }
7921da177e4SLinus Torvalds
7931da177e4SLinus Torvalds /*
794bc3b14cbSDave Chinner * Isolate the inode from the LRU in preparation for freeing it.
7951da177e4SLinus Torvalds *
7969e38d86fSNick Piggin * If the inode has the I_REFERENCED flag set, then it means that it has been
7979e38d86fSNick Piggin * used recently - the flag is set in iput_final(). When we encounter such an
7989e38d86fSNick Piggin * inode, clear the flag and move it to the back of the LRU so it gets another
7999e38d86fSNick Piggin * pass through the LRU before it gets reclaimed. This is necessary because of
8009e38d86fSNick Piggin * the fact we are doing lazy LRU updates to minimise lock contention so the
8019e38d86fSNick Piggin * LRU does not have strict ordering. Hence we don't want to reclaim inodes
8029e38d86fSNick Piggin * with this flag set because they are the inodes that are out of order.
8031da177e4SLinus Torvalds */
inode_lru_isolate(struct list_head * item,struct list_lru_one * lru,spinlock_t * lru_lock,void * arg)8043f97b163SVladimir Davydov static enum lru_status inode_lru_isolate(struct list_head *item,
8053f97b163SVladimir Davydov struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
8061da177e4SLinus Torvalds {
807bc3b14cbSDave Chinner struct list_head *freeable = arg;
808bc3b14cbSDave Chinner struct inode *inode = container_of(item, struct inode, i_lru);
8091da177e4SLinus Torvalds
8109e38d86fSNick Piggin /*
81151b8c1feSJohannes Weiner * We are inverting the lru lock/inode->i_lock here, so use a
81251b8c1feSJohannes Weiner * trylock. If we fail to get the lock, just skip it.
81302afc410SDave Chinner */
814bc3b14cbSDave Chinner if (!spin_trylock(&inode->i_lock))
815bc3b14cbSDave Chinner return LRU_SKIP;
81602afc410SDave Chinner
81702afc410SDave Chinner /*
81851b8c1feSJohannes Weiner * Inodes can get referenced, redirtied, or repopulated while
81951b8c1feSJohannes Weiner * they're already on the LRU, and this can make them
82051b8c1feSJohannes Weiner * unreclaimable for a while. Remove them lazily here; iput,
82151b8c1feSJohannes Weiner * sync, or the last page cache deletion will requeue them.
8229e38d86fSNick Piggin */
8239e38d86fSNick Piggin if (atomic_read(&inode->i_count) ||
82451b8c1feSJohannes Weiner (inode->i_state & ~I_REFERENCED) ||
82551b8c1feSJohannes Weiner !mapping_shrinkable(&inode->i_data)) {
8263f97b163SVladimir Davydov list_lru_isolate(lru, &inode->i_lru);
827f283c86aSDave Chinner spin_unlock(&inode->i_lock);
828fcb94f72SDave Chinner this_cpu_dec(nr_unused);
829bc3b14cbSDave Chinner return LRU_REMOVED;
8309e38d86fSNick Piggin }
8319e38d86fSNick Piggin
83251b8c1feSJohannes Weiner /* Recently referenced inodes get one more pass */
83369056ee6SDave Chinner if (inode->i_state & I_REFERENCED) {
8349e38d86fSNick Piggin inode->i_state &= ~I_REFERENCED;
835f283c86aSDave Chinner spin_unlock(&inode->i_lock);
836bc3b14cbSDave Chinner return LRU_ROTATE;
8371da177e4SLinus Torvalds }
838bc3b14cbSDave Chinner
83951b8c1feSJohannes Weiner /*
84051b8c1feSJohannes Weiner * On highmem systems, mapping_shrinkable() permits dropping
84151b8c1feSJohannes Weiner * page cache in order to free up struct inodes: lowmem might
84251b8c1feSJohannes Weiner * be under pressure before the cache inside the highmem zone.
84351b8c1feSJohannes Weiner */
8447ae12c80SJohannes Weiner if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
8451da177e4SLinus Torvalds __iget(inode);
846250df6edSDave Chinner spin_unlock(&inode->i_lock);
847bc3b14cbSDave Chinner spin_unlock(lru_lock);
848bc3b14cbSDave Chinner if (remove_inode_buffers(inode)) {
849bc3b14cbSDave Chinner unsigned long reap;
850bc3b14cbSDave Chinner reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
851f8891e5eSChristoph Lameter if (current_is_kswapd())
852f8891e5eSChristoph Lameter __count_vm_events(KSWAPD_INODESTEAL, reap);
853f8891e5eSChristoph Lameter else
854f8891e5eSChristoph Lameter __count_vm_events(PGINODESTEAL, reap);
855c7b23b68SYosry Ahmed mm_account_reclaimed_pages(reap);
856bc3b14cbSDave Chinner }
857bc3b14cbSDave Chinner iput(inode);
858bc3b14cbSDave Chinner spin_lock(lru_lock);
859bc3b14cbSDave Chinner return LRU_RETRY;
860bc3b14cbSDave Chinner }
8611da177e4SLinus Torvalds
862bc3b14cbSDave Chinner WARN_ON(inode->i_state & I_NEW);
863bc3b14cbSDave Chinner inode->i_state |= I_FREEING;
8643f97b163SVladimir Davydov list_lru_isolate_move(lru, &inode->i_lru, freeable);
865bc3b14cbSDave Chinner spin_unlock(&inode->i_lock);
866bc3b14cbSDave Chinner
867bc3b14cbSDave Chinner this_cpu_dec(nr_unused);
868bc3b14cbSDave Chinner return LRU_REMOVED;
869bc3b14cbSDave Chinner }
870bc3b14cbSDave Chinner
871bc3b14cbSDave Chinner /*
872bc3b14cbSDave Chinner * Walk the superblock inode LRU for freeable inodes and attempt to free them.
873bc3b14cbSDave Chinner * This is called from the superblock shrinker function with a number of inodes
874bc3b14cbSDave Chinner * to trim from the LRU. Inodes to be freed are moved to a temporary list and
875bc3b14cbSDave Chinner * then are freed outside inode_lock by dispose_list().
876bc3b14cbSDave Chinner */
prune_icache_sb(struct super_block * sb,struct shrink_control * sc)877503c358cSVladimir Davydov long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
878bc3b14cbSDave Chinner {
879bc3b14cbSDave Chinner LIST_HEAD(freeable);
880bc3b14cbSDave Chinner long freed;
881bc3b14cbSDave Chinner
882503c358cSVladimir Davydov freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
883503c358cSVladimir Davydov inode_lru_isolate, &freeable);
8841da177e4SLinus Torvalds dispose_list(&freeable);
8850a234c6dSDave Chinner return freed;
8861da177e4SLinus Torvalds }
8871da177e4SLinus Torvalds
8881da177e4SLinus Torvalds static void __wait_on_freeing_inode(struct inode *inode);
8891da177e4SLinus Torvalds /*
8901da177e4SLinus Torvalds * Called with the inode lock held.
8911da177e4SLinus Torvalds */
find_inode(struct super_block * sb,struct hlist_head * head,int (* test)(struct inode *,void *),void * data)8926b3304b5SManish Katiyar static struct inode *find_inode(struct super_block *sb,
8936b3304b5SManish Katiyar struct hlist_head *head,
8946b3304b5SManish Katiyar int (*test)(struct inode *, void *),
8956b3304b5SManish Katiyar void *data)
8961da177e4SLinus Torvalds {
8971da177e4SLinus Torvalds struct inode *inode = NULL;
8981da177e4SLinus Torvalds
8991da177e4SLinus Torvalds repeat:
900b67bfe0dSSasha Levin hlist_for_each_entry(inode, head, i_hash) {
9015a3cd992SAl Viro if (inode->i_sb != sb)
9025a3cd992SAl Viro continue;
9035a3cd992SAl Viro if (!test(inode, data))
9045a3cd992SAl Viro continue;
905250df6edSDave Chinner spin_lock(&inode->i_lock);
906a4ffdde6SAl Viro if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
9071da177e4SLinus Torvalds __wait_on_freeing_inode(inode);
9081da177e4SLinus Torvalds goto repeat;
9091da177e4SLinus Torvalds }
910c2b6d621SAl Viro if (unlikely(inode->i_state & I_CREATING)) {
911c2b6d621SAl Viro spin_unlock(&inode->i_lock);
912c2b6d621SAl Viro return ERR_PTR(-ESTALE);
913c2b6d621SAl Viro }
914f7899bd5SChristoph Hellwig __iget(inode);
915250df6edSDave Chinner spin_unlock(&inode->i_lock);
916f7899bd5SChristoph Hellwig return inode;
9171da177e4SLinus Torvalds }
918f7899bd5SChristoph Hellwig return NULL;
9191da177e4SLinus Torvalds }
9201da177e4SLinus Torvalds
9211da177e4SLinus Torvalds /*
9221da177e4SLinus Torvalds * find_inode_fast is the fast path version of find_inode, see the comment at
9231da177e4SLinus Torvalds * iget_locked for details.
9241da177e4SLinus Torvalds */
find_inode_fast(struct super_block * sb,struct hlist_head * head,unsigned long ino)9256b3304b5SManish Katiyar static struct inode *find_inode_fast(struct super_block *sb,
9266b3304b5SManish Katiyar struct hlist_head *head, unsigned long ino)
9271da177e4SLinus Torvalds {
9281da177e4SLinus Torvalds struct inode *inode = NULL;
9291da177e4SLinus Torvalds
9301da177e4SLinus Torvalds repeat:
931b67bfe0dSSasha Levin hlist_for_each_entry(inode, head, i_hash) {
9325a3cd992SAl Viro if (inode->i_ino != ino)
9335a3cd992SAl Viro continue;
9345a3cd992SAl Viro if (inode->i_sb != sb)
9355a3cd992SAl Viro continue;
936250df6edSDave Chinner spin_lock(&inode->i_lock);
937a4ffdde6SAl Viro if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
9381da177e4SLinus Torvalds __wait_on_freeing_inode(inode);
9391da177e4SLinus Torvalds goto repeat;
9401da177e4SLinus Torvalds }
941c2b6d621SAl Viro if (unlikely(inode->i_state & I_CREATING)) {
942c2b6d621SAl Viro spin_unlock(&inode->i_lock);
943c2b6d621SAl Viro return ERR_PTR(-ESTALE);
944c2b6d621SAl Viro }
945f7899bd5SChristoph Hellwig __iget(inode);
946250df6edSDave Chinner spin_unlock(&inode->i_lock);
947f7899bd5SChristoph Hellwig return inode;
9481da177e4SLinus Torvalds }
949f7899bd5SChristoph Hellwig return NULL;
9501da177e4SLinus Torvalds }
9511da177e4SLinus Torvalds
952f991bd2eSEric Dumazet /*
953f991bd2eSEric Dumazet * Each cpu owns a range of LAST_INO_BATCH numbers.
954f991bd2eSEric Dumazet * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
955f991bd2eSEric Dumazet * to renew the exhausted range.
9568290c35fSDavid Chinner *
957f991bd2eSEric Dumazet * This does not significantly increase overflow rate because every CPU can
958f991bd2eSEric Dumazet * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
959f991bd2eSEric Dumazet * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
960f991bd2eSEric Dumazet * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
961f991bd2eSEric Dumazet * overflow rate by 2x, which does not seem too significant.
962f991bd2eSEric Dumazet *
963f991bd2eSEric Dumazet * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
964f991bd2eSEric Dumazet * error if st_ino won't fit in target struct field. Use 32bit counter
965f991bd2eSEric Dumazet * here to attempt to avoid that.
9668290c35fSDavid Chinner */
967f991bd2eSEric Dumazet #define LAST_INO_BATCH 1024
968f991bd2eSEric Dumazet static DEFINE_PER_CPU(unsigned int, last_ino);
9698290c35fSDavid Chinner
get_next_ino(void)97085fe4025SChristoph Hellwig unsigned int get_next_ino(void)
971f991bd2eSEric Dumazet {
972f991bd2eSEric Dumazet unsigned int *p = &get_cpu_var(last_ino);
973f991bd2eSEric Dumazet unsigned int res = *p;
974f991bd2eSEric Dumazet
975f991bd2eSEric Dumazet #ifdef CONFIG_SMP
976f991bd2eSEric Dumazet if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
977f991bd2eSEric Dumazet static atomic_t shared_last_ino;
978f991bd2eSEric Dumazet int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
979f991bd2eSEric Dumazet
980f991bd2eSEric Dumazet res = next - LAST_INO_BATCH;
9818290c35fSDavid Chinner }
982f991bd2eSEric Dumazet #endif
983f991bd2eSEric Dumazet
9842adc376cSCarlos Maiolino res++;
9852adc376cSCarlos Maiolino /* get_next_ino should not provide a 0 inode number */
9862adc376cSCarlos Maiolino if (unlikely(!res))
9872adc376cSCarlos Maiolino res++;
9882adc376cSCarlos Maiolino *p = res;
989f991bd2eSEric Dumazet put_cpu_var(last_ino);
990f991bd2eSEric Dumazet return res;
991f991bd2eSEric Dumazet }
99285fe4025SChristoph Hellwig EXPORT_SYMBOL(get_next_ino);
9938290c35fSDavid Chinner
9941da177e4SLinus Torvalds /**
995a209dfc7SEric Dumazet * new_inode_pseudo - obtain an inode
996a209dfc7SEric Dumazet * @sb: superblock
997a209dfc7SEric Dumazet *
998a209dfc7SEric Dumazet * Allocates a new inode for given superblock.
999a209dfc7SEric Dumazet * Inode wont be chained in superblock s_inodes list
1000a209dfc7SEric Dumazet * This means :
1001a209dfc7SEric Dumazet * - fs can't be unmount
1002a209dfc7SEric Dumazet * - quotas, fsnotify, writeback can't work
1003a209dfc7SEric Dumazet */
new_inode_pseudo(struct super_block * sb)1004a209dfc7SEric Dumazet struct inode *new_inode_pseudo(struct super_block *sb)
1005a209dfc7SEric Dumazet {
1006a209dfc7SEric Dumazet struct inode *inode = alloc_inode(sb);
1007a209dfc7SEric Dumazet
1008a209dfc7SEric Dumazet if (inode) {
1009a209dfc7SEric Dumazet spin_lock(&inode->i_lock);
1010a209dfc7SEric Dumazet inode->i_state = 0;
1011a209dfc7SEric Dumazet spin_unlock(&inode->i_lock);
1012a209dfc7SEric Dumazet }
1013a209dfc7SEric Dumazet return inode;
1014a209dfc7SEric Dumazet }
1015a209dfc7SEric Dumazet
1016a209dfc7SEric Dumazet /**
10171da177e4SLinus Torvalds * new_inode - obtain an inode
10181da177e4SLinus Torvalds * @sb: superblock
10191da177e4SLinus Torvalds *
1020769848c0SMel Gorman * Allocates a new inode for given superblock. The default gfp_mask
10213c1d4378SHugh Dickins * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
1022769848c0SMel Gorman * If HIGHMEM pages are unsuitable or it is known that pages allocated
1023769848c0SMel Gorman * for the page cache are not reclaimable or migratable,
1024769848c0SMel Gorman * mapping_set_gfp_mask() must be called with suitable flags on the
1025769848c0SMel Gorman * newly created inode's mapping
1026769848c0SMel Gorman *
10271da177e4SLinus Torvalds */
new_inode(struct super_block * sb)10281da177e4SLinus Torvalds struct inode *new_inode(struct super_block *sb)
10291da177e4SLinus Torvalds {
10301da177e4SLinus Torvalds struct inode *inode;
10311da177e4SLinus Torvalds
1032a209dfc7SEric Dumazet inode = new_inode_pseudo(sb);
1033a209dfc7SEric Dumazet if (inode)
103455fa6091SDave Chinner inode_sb_list_add(inode);
10351da177e4SLinus Torvalds return inode;
10361da177e4SLinus Torvalds }
10371da177e4SLinus Torvalds EXPORT_SYMBOL(new_inode);
10381da177e4SLinus Torvalds
103914358e6dSPeter Zijlstra #ifdef CONFIG_DEBUG_LOCK_ALLOC
lockdep_annotate_inode_mutex_key(struct inode * inode)1040e096d0c7SJosh Boyer void lockdep_annotate_inode_mutex_key(struct inode *inode)
1041e096d0c7SJosh Boyer {
1042a3314a0eSNamhyung Kim if (S_ISDIR(inode->i_mode)) {
104314358e6dSPeter Zijlstra struct file_system_type *type = inode->i_sb->s_type;
10441e89a5e1SPeter Zijlstra
10459a7aa12fSJan Kara /* Set new key only if filesystem hasn't already changed it */
10469902af79SAl Viro if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
104714358e6dSPeter Zijlstra /*
104814358e6dSPeter Zijlstra * ensure nobody is actually holding i_mutex
104914358e6dSPeter Zijlstra */
10509902af79SAl Viro // mutex_destroy(&inode->i_mutex);
10519902af79SAl Viro init_rwsem(&inode->i_rwsem);
10529902af79SAl Viro lockdep_set_class(&inode->i_rwsem,
10539a7aa12fSJan Kara &type->i_mutex_dir_key);
10549a7aa12fSJan Kara }
10551e89a5e1SPeter Zijlstra }
1056e096d0c7SJosh Boyer }
1057e096d0c7SJosh Boyer EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
105814358e6dSPeter Zijlstra #endif
1059e096d0c7SJosh Boyer
1060e096d0c7SJosh Boyer /**
1061e096d0c7SJosh Boyer * unlock_new_inode - clear the I_NEW state and wake up any waiters
1062e096d0c7SJosh Boyer * @inode: new inode to unlock
1063e096d0c7SJosh Boyer *
1064e096d0c7SJosh Boyer * Called when the inode is fully initialised to clear the new state of the
1065e096d0c7SJosh Boyer * inode and wake up anyone waiting for the inode to finish initialisation.
1066e096d0c7SJosh Boyer */
unlock_new_inode(struct inode * inode)1067e096d0c7SJosh Boyer void unlock_new_inode(struct inode *inode)
1068e096d0c7SJosh Boyer {
1069e096d0c7SJosh Boyer lockdep_annotate_inode_mutex_key(inode);
1070250df6edSDave Chinner spin_lock(&inode->i_lock);
1071eaff8079SChristoph Hellwig WARN_ON(!(inode->i_state & I_NEW));
1072c2b6d621SAl Viro inode->i_state &= ~I_NEW & ~I_CREATING;
1073310fa7a3SAl Viro smp_mb();
1074250df6edSDave Chinner wake_up_bit(&inode->i_state, __I_NEW);
1075250df6edSDave Chinner spin_unlock(&inode->i_lock);
10761da177e4SLinus Torvalds }
10771da177e4SLinus Torvalds EXPORT_SYMBOL(unlock_new_inode);
10781da177e4SLinus Torvalds
discard_new_inode(struct inode * inode)1079c2b6d621SAl Viro void discard_new_inode(struct inode *inode)
1080c2b6d621SAl Viro {
1081c2b6d621SAl Viro lockdep_annotate_inode_mutex_key(inode);
1082c2b6d621SAl Viro spin_lock(&inode->i_lock);
1083c2b6d621SAl Viro WARN_ON(!(inode->i_state & I_NEW));
1084c2b6d621SAl Viro inode->i_state &= ~I_NEW;
1085c2b6d621SAl Viro smp_mb();
1086c2b6d621SAl Viro wake_up_bit(&inode->i_state, __I_NEW);
1087c2b6d621SAl Viro spin_unlock(&inode->i_lock);
1088c2b6d621SAl Viro iput(inode);
1089c2b6d621SAl Viro }
1090c2b6d621SAl Viro EXPORT_SYMBOL(discard_new_inode);
1091c2b6d621SAl Viro
10920b2d0724SChristoph Hellwig /**
1093f23ce757SJan Kara * lock_two_inodes - lock two inodes (may be regular files but also dirs)
1094f23ce757SJan Kara *
1095f23ce757SJan Kara * Lock any non-NULL argument. The caller must make sure that if he is passing
1096f23ce757SJan Kara * in two directories, one is not ancestor of the other. Zero, one or two
1097f23ce757SJan Kara * objects may be locked by this function.
1098f23ce757SJan Kara *
1099f23ce757SJan Kara * @inode1: first inode to lock
1100f23ce757SJan Kara * @inode2: second inode to lock
1101f23ce757SJan Kara * @subclass1: inode lock subclass for the first lock obtained
1102f23ce757SJan Kara * @subclass2: inode lock subclass for the second lock obtained
1103f23ce757SJan Kara */
lock_two_inodes(struct inode * inode1,struct inode * inode2,unsigned subclass1,unsigned subclass2)1104f23ce757SJan Kara void lock_two_inodes(struct inode *inode1, struct inode *inode2,
1105f23ce757SJan Kara unsigned subclass1, unsigned subclass2)
1106f23ce757SJan Kara {
1107f23ce757SJan Kara if (!inode1 || !inode2) {
1108f23ce757SJan Kara /*
1109f23ce757SJan Kara * Make sure @subclass1 will be used for the acquired lock.
1110f23ce757SJan Kara * This is not strictly necessary (no current caller cares) but
1111f23ce757SJan Kara * let's keep things consistent.
1112f23ce757SJan Kara */
1113f23ce757SJan Kara if (!inode1)
1114f23ce757SJan Kara swap(inode1, inode2);
1115f23ce757SJan Kara goto lock;
1116f23ce757SJan Kara }
1117f23ce757SJan Kara
1118f23ce757SJan Kara /*
1119f23ce757SJan Kara * If one object is directory and the other is not, we must make sure
1120f23ce757SJan Kara * to lock directory first as the other object may be its child.
1121f23ce757SJan Kara */
1122f23ce757SJan Kara if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
1123f23ce757SJan Kara if (inode1 > inode2)
1124f23ce757SJan Kara swap(inode1, inode2);
1125f23ce757SJan Kara } else if (!S_ISDIR(inode1->i_mode))
1126f23ce757SJan Kara swap(inode1, inode2);
1127f23ce757SJan Kara lock:
1128f23ce757SJan Kara if (inode1)
1129f23ce757SJan Kara inode_lock_nested(inode1, subclass1);
1130f23ce757SJan Kara if (inode2 && inode2 != inode1)
1131f23ce757SJan Kara inode_lock_nested(inode2, subclass2);
1132f23ce757SJan Kara }
1133f23ce757SJan Kara
1134f23ce757SJan Kara /**
1135375e289eSJ. Bruce Fields * lock_two_nondirectories - take two i_mutexes on non-directory objects
11364fd699aeSJ. Bruce Fields *
11372454ad83SJan Kara * Lock any non-NULL argument. Passed objects must not be directories.
11384fd699aeSJ. Bruce Fields * Zero, one or two objects may be locked by this function.
11394fd699aeSJ. Bruce Fields *
1140375e289eSJ. Bruce Fields * @inode1: first inode to lock
1141375e289eSJ. Bruce Fields * @inode2: second inode to lock
1142375e289eSJ. Bruce Fields */
lock_two_nondirectories(struct inode * inode1,struct inode * inode2)1143375e289eSJ. Bruce Fields void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1144375e289eSJ. Bruce Fields {
114533ab231fSChristian Brauner if (inode1)
11462454ad83SJan Kara WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
114733ab231fSChristian Brauner if (inode2)
11482454ad83SJan Kara WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
11492454ad83SJan Kara lock_two_inodes(inode1, inode2, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
1150375e289eSJ. Bruce Fields }
1151375e289eSJ. Bruce Fields EXPORT_SYMBOL(lock_two_nondirectories);
1152375e289eSJ. Bruce Fields
1153375e289eSJ. Bruce Fields /**
1154375e289eSJ. Bruce Fields * unlock_two_nondirectories - release locks from lock_two_nondirectories()
1155375e289eSJ. Bruce Fields * @inode1: first inode to unlock
1156375e289eSJ. Bruce Fields * @inode2: second inode to unlock
1157375e289eSJ. Bruce Fields */
unlock_two_nondirectories(struct inode * inode1,struct inode * inode2)1158375e289eSJ. Bruce Fields void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
1159375e289eSJ. Bruce Fields {
11602454ad83SJan Kara if (inode1) {
11612454ad83SJan Kara WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
11625955102cSAl Viro inode_unlock(inode1);
11632454ad83SJan Kara }
11642454ad83SJan Kara if (inode2 && inode2 != inode1) {
11652454ad83SJan Kara WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
11665955102cSAl Viro inode_unlock(inode2);
1167375e289eSJ. Bruce Fields }
11682454ad83SJan Kara }
1169375e289eSJ. Bruce Fields EXPORT_SYMBOL(unlock_two_nondirectories);
1170375e289eSJ. Bruce Fields
1171375e289eSJ. Bruce Fields /**
117280ea09a0SMiklos Szeredi * inode_insert5 - obtain an inode from a mounted file system
117380ea09a0SMiklos Szeredi * @inode: pre-allocated inode to use for insert to cache
117480ea09a0SMiklos Szeredi * @hashval: hash value (usually inode number) to get
117580ea09a0SMiklos Szeredi * @test: callback used for comparisons between inodes
117680ea09a0SMiklos Szeredi * @set: callback used to initialize a new struct inode
117780ea09a0SMiklos Szeredi * @data: opaque data pointer to pass to @test and @set
117880ea09a0SMiklos Szeredi *
117980ea09a0SMiklos Szeredi * Search for the inode specified by @hashval and @data in the inode cache,
118080ea09a0SMiklos Szeredi * and if present it is return it with an increased reference count. This is
118180ea09a0SMiklos Szeredi * a variant of iget5_locked() for callers that don't want to fail on memory
118280ea09a0SMiklos Szeredi * allocation of inode.
118380ea09a0SMiklos Szeredi *
118480ea09a0SMiklos Szeredi * If the inode is not in cache, insert the pre-allocated inode to cache and
118580ea09a0SMiklos Szeredi * return it locked, hashed, and with the I_NEW flag set. The file system gets
118680ea09a0SMiklos Szeredi * to fill it in before unlocking it via unlock_new_inode().
118780ea09a0SMiklos Szeredi *
118880ea09a0SMiklos Szeredi * Note both @test and @set are called with the inode_hash_lock held, so can't
118980ea09a0SMiklos Szeredi * sleep.
119080ea09a0SMiklos Szeredi */
inode_insert5(struct inode * inode,unsigned long hashval,int (* test)(struct inode *,void *),int (* set)(struct inode *,void *),void * data)119180ea09a0SMiklos Szeredi struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
119280ea09a0SMiklos Szeredi int (*test)(struct inode *, void *),
119380ea09a0SMiklos Szeredi int (*set)(struct inode *, void *), void *data)
119480ea09a0SMiklos Szeredi {
119580ea09a0SMiklos Szeredi struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
119680ea09a0SMiklos Szeredi struct inode *old;
119780ea09a0SMiklos Szeredi
119880ea09a0SMiklos Szeredi again:
119980ea09a0SMiklos Szeredi spin_lock(&inode_hash_lock);
120080ea09a0SMiklos Szeredi old = find_inode(inode->i_sb, head, test, data);
120180ea09a0SMiklos Szeredi if (unlikely(old)) {
120280ea09a0SMiklos Szeredi /*
120380ea09a0SMiklos Szeredi * Uhhuh, somebody else created the same inode under us.
120480ea09a0SMiklos Szeredi * Use the old inode instead of the preallocated one.
120580ea09a0SMiklos Szeredi */
120680ea09a0SMiklos Szeredi spin_unlock(&inode_hash_lock);
1207c2b6d621SAl Viro if (IS_ERR(old))
1208c2b6d621SAl Viro return NULL;
120980ea09a0SMiklos Szeredi wait_on_inode(old);
121080ea09a0SMiklos Szeredi if (unlikely(inode_unhashed(old))) {
121180ea09a0SMiklos Szeredi iput(old);
121280ea09a0SMiklos Szeredi goto again;
121380ea09a0SMiklos Szeredi }
121480ea09a0SMiklos Szeredi return old;
121580ea09a0SMiklos Szeredi }
121680ea09a0SMiklos Szeredi
121780ea09a0SMiklos Szeredi if (set && unlikely(set(inode, data))) {
121880ea09a0SMiklos Szeredi inode = NULL;
121980ea09a0SMiklos Szeredi goto unlock;
122080ea09a0SMiklos Szeredi }
122180ea09a0SMiklos Szeredi
122280ea09a0SMiklos Szeredi /*
122380ea09a0SMiklos Szeredi * Return the locked inode with I_NEW set, the
122480ea09a0SMiklos Szeredi * caller is responsible for filling in the contents
122580ea09a0SMiklos Szeredi */
122680ea09a0SMiklos Szeredi spin_lock(&inode->i_lock);
122780ea09a0SMiklos Szeredi inode->i_state |= I_NEW;
12283f19b2abSDavid Howells hlist_add_head_rcu(&inode->i_hash, head);
122980ea09a0SMiklos Szeredi spin_unlock(&inode->i_lock);
123018cc912bSJeff Layton
123118cc912bSJeff Layton /*
123218cc912bSJeff Layton * Add inode to the sb list if it's not already. It has I_NEW at this
123318cc912bSJeff Layton * point, so it should be safe to test i_sb_list locklessly.
123418cc912bSJeff Layton */
123518cc912bSJeff Layton if (list_empty(&inode->i_sb_list))
1236e950564bSMiklos Szeredi inode_sb_list_add(inode);
123780ea09a0SMiklos Szeredi unlock:
123880ea09a0SMiklos Szeredi spin_unlock(&inode_hash_lock);
123980ea09a0SMiklos Szeredi
124080ea09a0SMiklos Szeredi return inode;
124180ea09a0SMiklos Szeredi }
124280ea09a0SMiklos Szeredi EXPORT_SYMBOL(inode_insert5);
124380ea09a0SMiklos Szeredi
124480ea09a0SMiklos Szeredi /**
12450b2d0724SChristoph Hellwig * iget5_locked - obtain an inode from a mounted file system
12460b2d0724SChristoph Hellwig * @sb: super block of file system
12470b2d0724SChristoph Hellwig * @hashval: hash value (usually inode number) to get
12480b2d0724SChristoph Hellwig * @test: callback used for comparisons between inodes
12490b2d0724SChristoph Hellwig * @set: callback used to initialize a new struct inode
12500b2d0724SChristoph Hellwig * @data: opaque data pointer to pass to @test and @set
12511da177e4SLinus Torvalds *
12520b2d0724SChristoph Hellwig * Search for the inode specified by @hashval and @data in the inode cache,
12530b2d0724SChristoph Hellwig * and if present it is return it with an increased reference count. This is
12540b2d0724SChristoph Hellwig * a generalized version of iget_locked() for file systems where the inode
12550b2d0724SChristoph Hellwig * number is not sufficient for unique identification of an inode.
12560b2d0724SChristoph Hellwig *
12570b2d0724SChristoph Hellwig * If the inode is not in cache, allocate a new inode and return it locked,
12580b2d0724SChristoph Hellwig * hashed, and with the I_NEW flag set. The file system gets to fill it in
12590b2d0724SChristoph Hellwig * before unlocking it via unlock_new_inode().
12600b2d0724SChristoph Hellwig *
12610b2d0724SChristoph Hellwig * Note both @test and @set are called with the inode_hash_lock held, so can't
12620b2d0724SChristoph Hellwig * sleep.
12631da177e4SLinus Torvalds */
iget5_locked(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),int (* set)(struct inode *,void *),void * data)12640b2d0724SChristoph Hellwig struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
12656b3304b5SManish Katiyar int (*test)(struct inode *, void *),
12660b2d0724SChristoph Hellwig int (*set)(struct inode *, void *), void *data)
12671da177e4SLinus Torvalds {
126880ea09a0SMiklos Szeredi struct inode *inode = ilookup5(sb, hashval, test, data);
12690b2d0724SChristoph Hellwig
127080ea09a0SMiklos Szeredi if (!inode) {
1271e950564bSMiklos Szeredi struct inode *new = alloc_inode(sb);
12720b2d0724SChristoph Hellwig
127380ea09a0SMiklos Szeredi if (new) {
1274e950564bSMiklos Szeredi new->i_state = 0;
127580ea09a0SMiklos Szeredi inode = inode_insert5(new, hashval, test, set, data);
127680ea09a0SMiklos Szeredi if (unlikely(inode != new))
1277e950564bSMiklos Szeredi destroy_inode(new);
12782864f301SAl Viro }
12791da177e4SLinus Torvalds }
12801da177e4SLinus Torvalds return inode;
12811da177e4SLinus Torvalds }
12820b2d0724SChristoph Hellwig EXPORT_SYMBOL(iget5_locked);
12831da177e4SLinus Torvalds
12840b2d0724SChristoph Hellwig /**
12850b2d0724SChristoph Hellwig * iget_locked - obtain an inode from a mounted file system
12860b2d0724SChristoph Hellwig * @sb: super block of file system
12870b2d0724SChristoph Hellwig * @ino: inode number to get
12880b2d0724SChristoph Hellwig *
12890b2d0724SChristoph Hellwig * Search for the inode specified by @ino in the inode cache and if present
12900b2d0724SChristoph Hellwig * return it with an increased reference count. This is for file systems
12910b2d0724SChristoph Hellwig * where the inode number is sufficient for unique identification of an inode.
12920b2d0724SChristoph Hellwig *
12930b2d0724SChristoph Hellwig * If the inode is not in cache, allocate a new inode and return it locked,
12940b2d0724SChristoph Hellwig * hashed, and with the I_NEW flag set. The file system gets to fill it in
12950b2d0724SChristoph Hellwig * before unlocking it via unlock_new_inode().
12961da177e4SLinus Torvalds */
iget_locked(struct super_block * sb,unsigned long ino)12970b2d0724SChristoph Hellwig struct inode *iget_locked(struct super_block *sb, unsigned long ino)
12981da177e4SLinus Torvalds {
12990b2d0724SChristoph Hellwig struct hlist_head *head = inode_hashtable + hash(sb, ino);
13001da177e4SLinus Torvalds struct inode *inode;
13012864f301SAl Viro again:
13020b2d0724SChristoph Hellwig spin_lock(&inode_hash_lock);
13030b2d0724SChristoph Hellwig inode = find_inode_fast(sb, head, ino);
13040b2d0724SChristoph Hellwig spin_unlock(&inode_hash_lock);
13050b2d0724SChristoph Hellwig if (inode) {
1306c2b6d621SAl Viro if (IS_ERR(inode))
1307c2b6d621SAl Viro return NULL;
13080b2d0724SChristoph Hellwig wait_on_inode(inode);
13092864f301SAl Viro if (unlikely(inode_unhashed(inode))) {
13102864f301SAl Viro iput(inode);
13112864f301SAl Viro goto again;
13122864f301SAl Viro }
13130b2d0724SChristoph Hellwig return inode;
13140b2d0724SChristoph Hellwig }
13150b2d0724SChristoph Hellwig
13161da177e4SLinus Torvalds inode = alloc_inode(sb);
13171da177e4SLinus Torvalds if (inode) {
13181da177e4SLinus Torvalds struct inode *old;
13191da177e4SLinus Torvalds
132067a23c49SDave Chinner spin_lock(&inode_hash_lock);
13211da177e4SLinus Torvalds /* We released the lock, so.. */
13221da177e4SLinus Torvalds old = find_inode_fast(sb, head, ino);
13231da177e4SLinus Torvalds if (!old) {
13241da177e4SLinus Torvalds inode->i_ino = ino;
1325250df6edSDave Chinner spin_lock(&inode->i_lock);
1326eaff8079SChristoph Hellwig inode->i_state = I_NEW;
13273f19b2abSDavid Howells hlist_add_head_rcu(&inode->i_hash, head);
1328250df6edSDave Chinner spin_unlock(&inode->i_lock);
132955fa6091SDave Chinner inode_sb_list_add(inode);
133067a23c49SDave Chinner spin_unlock(&inode_hash_lock);
13311da177e4SLinus Torvalds
13321da177e4SLinus Torvalds /* Return the locked inode with I_NEW set, the
13331da177e4SLinus Torvalds * caller is responsible for filling in the contents
13341da177e4SLinus Torvalds */
13351da177e4SLinus Torvalds return inode;
13361da177e4SLinus Torvalds }
13371da177e4SLinus Torvalds
13381da177e4SLinus Torvalds /*
13391da177e4SLinus Torvalds * Uhhuh, somebody else created the same inode under
13401da177e4SLinus Torvalds * us. Use the old inode instead of the one we just
13411da177e4SLinus Torvalds * allocated.
13421da177e4SLinus Torvalds */
134367a23c49SDave Chinner spin_unlock(&inode_hash_lock);
13441da177e4SLinus Torvalds destroy_inode(inode);
1345c2b6d621SAl Viro if (IS_ERR(old))
1346c2b6d621SAl Viro return NULL;
13471da177e4SLinus Torvalds inode = old;
13481da177e4SLinus Torvalds wait_on_inode(inode);
13492864f301SAl Viro if (unlikely(inode_unhashed(inode))) {
13502864f301SAl Viro iput(inode);
13512864f301SAl Viro goto again;
13522864f301SAl Viro }
13531da177e4SLinus Torvalds }
13541da177e4SLinus Torvalds return inode;
13551da177e4SLinus Torvalds }
13560b2d0724SChristoph Hellwig EXPORT_SYMBOL(iget_locked);
13571da177e4SLinus Torvalds
1358ad5e195aSChristoph Hellwig /*
1359ad5e195aSChristoph Hellwig * search the inode cache for a matching inode number.
1360ad5e195aSChristoph Hellwig * If we find one, then the inode number we are trying to
1361ad5e195aSChristoph Hellwig * allocate is not unique and so we should not use it.
1362ad5e195aSChristoph Hellwig *
1363ad5e195aSChristoph Hellwig * Returns 1 if the inode number is unique, 0 if it is not.
1364ad5e195aSChristoph Hellwig */
test_inode_iunique(struct super_block * sb,unsigned long ino)1365ad5e195aSChristoph Hellwig static int test_inode_iunique(struct super_block *sb, unsigned long ino)
1366ad5e195aSChristoph Hellwig {
1367ad5e195aSChristoph Hellwig struct hlist_head *b = inode_hashtable + hash(sb, ino);
1368ad5e195aSChristoph Hellwig struct inode *inode;
1369ad5e195aSChristoph Hellwig
13703f19b2abSDavid Howells hlist_for_each_entry_rcu(inode, b, i_hash) {
13713f19b2abSDavid Howells if (inode->i_ino == ino && inode->i_sb == sb)
1372ad5e195aSChristoph Hellwig return 0;
1373ad5e195aSChristoph Hellwig }
1374ad5e195aSChristoph Hellwig return 1;
1375ad5e195aSChristoph Hellwig }
1376ad5e195aSChristoph Hellwig
13771da177e4SLinus Torvalds /**
13781da177e4SLinus Torvalds * iunique - get a unique inode number
13791da177e4SLinus Torvalds * @sb: superblock
13801da177e4SLinus Torvalds * @max_reserved: highest reserved inode number
13811da177e4SLinus Torvalds *
13821da177e4SLinus Torvalds * Obtain an inode number that is unique on the system for a given
13831da177e4SLinus Torvalds * superblock. This is used by file systems that have no natural
13841da177e4SLinus Torvalds * permanent inode numbering system. An inode number is returned that
13851da177e4SLinus Torvalds * is higher than the reserved limit but unique.
13861da177e4SLinus Torvalds *
13871da177e4SLinus Torvalds * BUGS:
13881da177e4SLinus Torvalds * With a large number of inodes live on the file system this function
13891da177e4SLinus Torvalds * currently becomes quite slow.
13901da177e4SLinus Torvalds */
iunique(struct super_block * sb,ino_t max_reserved)13911da177e4SLinus Torvalds ino_t iunique(struct super_block *sb, ino_t max_reserved)
13921da177e4SLinus Torvalds {
1393866b04fcSJeff Layton /*
1394866b04fcSJeff Layton * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
1395866b04fcSJeff Layton * error if st_ino won't fit in target struct field. Use 32bit counter
1396866b04fcSJeff Layton * here to attempt to avoid that.
1397866b04fcSJeff Layton */
1398ad5e195aSChristoph Hellwig static DEFINE_SPINLOCK(iunique_lock);
1399866b04fcSJeff Layton static unsigned int counter;
14001da177e4SLinus Torvalds ino_t res;
14013361c7beSJeffrey Layton
14023f19b2abSDavid Howells rcu_read_lock();
1403ad5e195aSChristoph Hellwig spin_lock(&iunique_lock);
14043361c7beSJeffrey Layton do {
14053361c7beSJeffrey Layton if (counter <= max_reserved)
14063361c7beSJeffrey Layton counter = max_reserved + 1;
14071da177e4SLinus Torvalds res = counter++;
1408ad5e195aSChristoph Hellwig } while (!test_inode_iunique(sb, res));
1409ad5e195aSChristoph Hellwig spin_unlock(&iunique_lock);
14103f19b2abSDavid Howells rcu_read_unlock();
14113361c7beSJeffrey Layton
14121da177e4SLinus Torvalds return res;
14131da177e4SLinus Torvalds }
14141da177e4SLinus Torvalds EXPORT_SYMBOL(iunique);
14151da177e4SLinus Torvalds
igrab(struct inode * inode)14161da177e4SLinus Torvalds struct inode *igrab(struct inode *inode)
14171da177e4SLinus Torvalds {
1418250df6edSDave Chinner spin_lock(&inode->i_lock);
1419250df6edSDave Chinner if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
14201da177e4SLinus Torvalds __iget(inode);
1421250df6edSDave Chinner spin_unlock(&inode->i_lock);
1422250df6edSDave Chinner } else {
1423250df6edSDave Chinner spin_unlock(&inode->i_lock);
14241da177e4SLinus Torvalds /*
14251da177e4SLinus Torvalds * Handle the case where s_op->clear_inode is not been
14261da177e4SLinus Torvalds * called yet, and somebody is calling igrab
14271da177e4SLinus Torvalds * while the inode is getting freed.
14281da177e4SLinus Torvalds */
14291da177e4SLinus Torvalds inode = NULL;
1430250df6edSDave Chinner }
14311da177e4SLinus Torvalds return inode;
14321da177e4SLinus Torvalds }
14331da177e4SLinus Torvalds EXPORT_SYMBOL(igrab);
14341da177e4SLinus Torvalds
14351da177e4SLinus Torvalds /**
143688bd5121SAnton Altaparmakov * ilookup5_nowait - search for an inode in the inode cache
14371da177e4SLinus Torvalds * @sb: super block of file system to search
14381da177e4SLinus Torvalds * @hashval: hash value (usually inode number) to search for
14391da177e4SLinus Torvalds * @test: callback used for comparisons between inodes
14401da177e4SLinus Torvalds * @data: opaque data pointer to pass to @test
14411da177e4SLinus Torvalds *
14420b2d0724SChristoph Hellwig * Search for the inode specified by @hashval and @data in the inode cache.
14431da177e4SLinus Torvalds * If the inode is in the cache, the inode is returned with an incremented
14440b2d0724SChristoph Hellwig * reference count.
144588bd5121SAnton Altaparmakov *
14460b2d0724SChristoph Hellwig * Note: I_NEW is not waited upon so you have to be very careful what you do
14470b2d0724SChristoph Hellwig * with the returned inode. You probably should be using ilookup5() instead.
144888bd5121SAnton Altaparmakov *
1449b6d0ad68SRandy Dunlap * Note2: @test is called with the inode_hash_lock held, so can't sleep.
145088bd5121SAnton Altaparmakov */
ilookup5_nowait(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)145188bd5121SAnton Altaparmakov struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
145288bd5121SAnton Altaparmakov int (*test)(struct inode *, void *), void *data)
145388bd5121SAnton Altaparmakov {
145488bd5121SAnton Altaparmakov struct hlist_head *head = inode_hashtable + hash(sb, hashval);
14550b2d0724SChristoph Hellwig struct inode *inode;
145688bd5121SAnton Altaparmakov
14570b2d0724SChristoph Hellwig spin_lock(&inode_hash_lock);
14580b2d0724SChristoph Hellwig inode = find_inode(sb, head, test, data);
14590b2d0724SChristoph Hellwig spin_unlock(&inode_hash_lock);
14600b2d0724SChristoph Hellwig
1461c2b6d621SAl Viro return IS_ERR(inode) ? NULL : inode;
146288bd5121SAnton Altaparmakov }
146388bd5121SAnton Altaparmakov EXPORT_SYMBOL(ilookup5_nowait);
146488bd5121SAnton Altaparmakov
146588bd5121SAnton Altaparmakov /**
146688bd5121SAnton Altaparmakov * ilookup5 - search for an inode in the inode cache
146788bd5121SAnton Altaparmakov * @sb: super block of file system to search
146888bd5121SAnton Altaparmakov * @hashval: hash value (usually inode number) to search for
146988bd5121SAnton Altaparmakov * @test: callback used for comparisons between inodes
147088bd5121SAnton Altaparmakov * @data: opaque data pointer to pass to @test
147188bd5121SAnton Altaparmakov *
14720b2d0724SChristoph Hellwig * Search for the inode specified by @hashval and @data in the inode cache,
14730b2d0724SChristoph Hellwig * and if the inode is in the cache, return the inode with an incremented
14740b2d0724SChristoph Hellwig * reference count. Waits on I_NEW before returning the inode.
147588bd5121SAnton Altaparmakov * returned with an incremented reference count.
14761da177e4SLinus Torvalds *
14770b2d0724SChristoph Hellwig * This is a generalized version of ilookup() for file systems where the
14780b2d0724SChristoph Hellwig * inode number is not sufficient for unique identification of an inode.
14791da177e4SLinus Torvalds *
14800b2d0724SChristoph Hellwig * Note: @test is called with the inode_hash_lock held, so can't sleep.
14811da177e4SLinus Torvalds */
ilookup5(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)14821da177e4SLinus Torvalds struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
14831da177e4SLinus Torvalds int (*test)(struct inode *, void *), void *data)
14841da177e4SLinus Torvalds {
14852864f301SAl Viro struct inode *inode;
14862864f301SAl Viro again:
14872864f301SAl Viro inode = ilookup5_nowait(sb, hashval, test, data);
14882864f301SAl Viro if (inode) {
14890b2d0724SChristoph Hellwig wait_on_inode(inode);
14902864f301SAl Viro if (unlikely(inode_unhashed(inode))) {
14912864f301SAl Viro iput(inode);
14922864f301SAl Viro goto again;
14932864f301SAl Viro }
14942864f301SAl Viro }
14950b2d0724SChristoph Hellwig return inode;
14961da177e4SLinus Torvalds }
14971da177e4SLinus Torvalds EXPORT_SYMBOL(ilookup5);
14981da177e4SLinus Torvalds
14991da177e4SLinus Torvalds /**
15001da177e4SLinus Torvalds * ilookup - search for an inode in the inode cache
15011da177e4SLinus Torvalds * @sb: super block of file system to search
15021da177e4SLinus Torvalds * @ino: inode number to search for
15031da177e4SLinus Torvalds *
15040b2d0724SChristoph Hellwig * Search for the inode @ino in the inode cache, and if the inode is in the
15050b2d0724SChristoph Hellwig * cache, the inode is returned with an incremented reference count.
15061da177e4SLinus Torvalds */
ilookup(struct super_block * sb,unsigned long ino)15071da177e4SLinus Torvalds struct inode *ilookup(struct super_block *sb, unsigned long ino)
15081da177e4SLinus Torvalds {
15091da177e4SLinus Torvalds struct hlist_head *head = inode_hashtable + hash(sb, ino);
15100b2d0724SChristoph Hellwig struct inode *inode;
15112864f301SAl Viro again:
15120b2d0724SChristoph Hellwig spin_lock(&inode_hash_lock);
15130b2d0724SChristoph Hellwig inode = find_inode_fast(sb, head, ino);
15140b2d0724SChristoph Hellwig spin_unlock(&inode_hash_lock);
15150b2d0724SChristoph Hellwig
15162864f301SAl Viro if (inode) {
1517c2b6d621SAl Viro if (IS_ERR(inode))
1518c2b6d621SAl Viro return NULL;
15190b2d0724SChristoph Hellwig wait_on_inode(inode);
15202864f301SAl Viro if (unlikely(inode_unhashed(inode))) {
15212864f301SAl Viro iput(inode);
15222864f301SAl Viro goto again;
15232864f301SAl Viro }
15242864f301SAl Viro }
15250b2d0724SChristoph Hellwig return inode;
15261da177e4SLinus Torvalds }
15271da177e4SLinus Torvalds EXPORT_SYMBOL(ilookup);
15281da177e4SLinus Torvalds
1529fe032c42STheodore Ts'o /**
1530fe032c42STheodore Ts'o * find_inode_nowait - find an inode in the inode cache
1531fe032c42STheodore Ts'o * @sb: super block of file system to search
1532fe032c42STheodore Ts'o * @hashval: hash value (usually inode number) to search for
1533fe032c42STheodore Ts'o * @match: callback used for comparisons between inodes
1534fe032c42STheodore Ts'o * @data: opaque data pointer to pass to @match
1535fe032c42STheodore Ts'o *
1536fe032c42STheodore Ts'o * Search for the inode specified by @hashval and @data in the inode
1537fe032c42STheodore Ts'o * cache, where the helper function @match will return 0 if the inode
1538fe032c42STheodore Ts'o * does not match, 1 if the inode does match, and -1 if the search
1539fe032c42STheodore Ts'o * should be stopped. The @match function must be responsible for
1540fe032c42STheodore Ts'o * taking the i_lock spin_lock and checking i_state for an inode being
1541fe032c42STheodore Ts'o * freed or being initialized, and incrementing the reference count
1542fe032c42STheodore Ts'o * before returning 1. It also must not sleep, since it is called with
1543fe032c42STheodore Ts'o * the inode_hash_lock spinlock held.
1544fe032c42STheodore Ts'o *
1545fe032c42STheodore Ts'o * This is a even more generalized version of ilookup5() when the
1546fe032c42STheodore Ts'o * function must never block --- find_inode() can block in
1547fe032c42STheodore Ts'o * __wait_on_freeing_inode() --- or when the caller can not increment
1548fe032c42STheodore Ts'o * the reference count because the resulting iput() might cause an
1549fe032c42STheodore Ts'o * inode eviction. The tradeoff is that the @match funtion must be
1550fe032c42STheodore Ts'o * very carefully implemented.
1551fe032c42STheodore Ts'o */
find_inode_nowait(struct super_block * sb,unsigned long hashval,int (* match)(struct inode *,unsigned long,void *),void * data)1552fe032c42STheodore Ts'o struct inode *find_inode_nowait(struct super_block *sb,
1553fe032c42STheodore Ts'o unsigned long hashval,
1554fe032c42STheodore Ts'o int (*match)(struct inode *, unsigned long,
1555fe032c42STheodore Ts'o void *),
1556fe032c42STheodore Ts'o void *data)
1557fe032c42STheodore Ts'o {
1558fe032c42STheodore Ts'o struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1559fe032c42STheodore Ts'o struct inode *inode, *ret_inode = NULL;
1560fe032c42STheodore Ts'o int mval;
1561fe032c42STheodore Ts'o
1562fe032c42STheodore Ts'o spin_lock(&inode_hash_lock);
1563fe032c42STheodore Ts'o hlist_for_each_entry(inode, head, i_hash) {
1564fe032c42STheodore Ts'o if (inode->i_sb != sb)
1565fe032c42STheodore Ts'o continue;
1566fe032c42STheodore Ts'o mval = match(inode, hashval, data);
1567fe032c42STheodore Ts'o if (mval == 0)
1568fe032c42STheodore Ts'o continue;
1569fe032c42STheodore Ts'o if (mval == 1)
1570fe032c42STheodore Ts'o ret_inode = inode;
1571fe032c42STheodore Ts'o goto out;
1572fe032c42STheodore Ts'o }
1573fe032c42STheodore Ts'o out:
1574fe032c42STheodore Ts'o spin_unlock(&inode_hash_lock);
1575fe032c42STheodore Ts'o return ret_inode;
1576fe032c42STheodore Ts'o }
1577fe032c42STheodore Ts'o EXPORT_SYMBOL(find_inode_nowait);
1578fe032c42STheodore Ts'o
15793f19b2abSDavid Howells /**
15803f19b2abSDavid Howells * find_inode_rcu - find an inode in the inode cache
15813f19b2abSDavid Howells * @sb: Super block of file system to search
15823f19b2abSDavid Howells * @hashval: Key to hash
15833f19b2abSDavid Howells * @test: Function to test match on an inode
15843f19b2abSDavid Howells * @data: Data for test function
15853f19b2abSDavid Howells *
15863f19b2abSDavid Howells * Search for the inode specified by @hashval and @data in the inode cache,
15873f19b2abSDavid Howells * where the helper function @test will return 0 if the inode does not match
15883f19b2abSDavid Howells * and 1 if it does. The @test function must be responsible for taking the
15893f19b2abSDavid Howells * i_lock spin_lock and checking i_state for an inode being freed or being
15903f19b2abSDavid Howells * initialized.
15913f19b2abSDavid Howells *
15923f19b2abSDavid Howells * If successful, this will return the inode for which the @test function
15933f19b2abSDavid Howells * returned 1 and NULL otherwise.
15943f19b2abSDavid Howells *
15953f19b2abSDavid Howells * The @test function is not permitted to take a ref on any inode presented.
15963f19b2abSDavid Howells * It is also not permitted to sleep.
15973f19b2abSDavid Howells *
15983f19b2abSDavid Howells * The caller must hold the RCU read lock.
15993f19b2abSDavid Howells */
find_inode_rcu(struct super_block * sb,unsigned long hashval,int (* test)(struct inode *,void *),void * data)16003f19b2abSDavid Howells struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
16013f19b2abSDavid Howells int (*test)(struct inode *, void *), void *data)
16023f19b2abSDavid Howells {
16033f19b2abSDavid Howells struct hlist_head *head = inode_hashtable + hash(sb, hashval);
16043f19b2abSDavid Howells struct inode *inode;
16053f19b2abSDavid Howells
16063f19b2abSDavid Howells RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
16073f19b2abSDavid Howells "suspicious find_inode_rcu() usage");
16083f19b2abSDavid Howells
16093f19b2abSDavid Howells hlist_for_each_entry_rcu(inode, head, i_hash) {
16103f19b2abSDavid Howells if (inode->i_sb == sb &&
16113f19b2abSDavid Howells !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
16123f19b2abSDavid Howells test(inode, data))
16133f19b2abSDavid Howells return inode;
16143f19b2abSDavid Howells }
16153f19b2abSDavid Howells return NULL;
16163f19b2abSDavid Howells }
16173f19b2abSDavid Howells EXPORT_SYMBOL(find_inode_rcu);
16183f19b2abSDavid Howells
16193f19b2abSDavid Howells /**
1620961f3c89SMauro Carvalho Chehab * find_inode_by_ino_rcu - Find an inode in the inode cache
16213f19b2abSDavid Howells * @sb: Super block of file system to search
16223f19b2abSDavid Howells * @ino: The inode number to match
16233f19b2abSDavid Howells *
16243f19b2abSDavid Howells * Search for the inode specified by @hashval and @data in the inode cache,
16253f19b2abSDavid Howells * where the helper function @test will return 0 if the inode does not match
16263f19b2abSDavid Howells * and 1 if it does. The @test function must be responsible for taking the
16273f19b2abSDavid Howells * i_lock spin_lock and checking i_state for an inode being freed or being
16283f19b2abSDavid Howells * initialized.
16293f19b2abSDavid Howells *
16303f19b2abSDavid Howells * If successful, this will return the inode for which the @test function
16313f19b2abSDavid Howells * returned 1 and NULL otherwise.
16323f19b2abSDavid Howells *
16333f19b2abSDavid Howells * The @test function is not permitted to take a ref on any inode presented.
16343f19b2abSDavid Howells * It is also not permitted to sleep.
16353f19b2abSDavid Howells *
16363f19b2abSDavid Howells * The caller must hold the RCU read lock.
16373f19b2abSDavid Howells */
find_inode_by_ino_rcu(struct super_block * sb,unsigned long ino)16383f19b2abSDavid Howells struct inode *find_inode_by_ino_rcu(struct super_block *sb,
16393f19b2abSDavid Howells unsigned long ino)
16403f19b2abSDavid Howells {
16413f19b2abSDavid Howells struct hlist_head *head = inode_hashtable + hash(sb, ino);
16423f19b2abSDavid Howells struct inode *inode;
16433f19b2abSDavid Howells
16443f19b2abSDavid Howells RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
16453f19b2abSDavid Howells "suspicious find_inode_by_ino_rcu() usage");
16463f19b2abSDavid Howells
16473f19b2abSDavid Howells hlist_for_each_entry_rcu(inode, head, i_hash) {
16483f19b2abSDavid Howells if (inode->i_ino == ino &&
16493f19b2abSDavid Howells inode->i_sb == sb &&
16503f19b2abSDavid Howells !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
16513f19b2abSDavid Howells return inode;
16523f19b2abSDavid Howells }
16533f19b2abSDavid Howells return NULL;
16543f19b2abSDavid Howells }
16553f19b2abSDavid Howells EXPORT_SYMBOL(find_inode_by_ino_rcu);
16563f19b2abSDavid Howells
insert_inode_locked(struct inode * inode)1657261bca86SAl Viro int insert_inode_locked(struct inode *inode)
1658261bca86SAl Viro {
1659261bca86SAl Viro struct super_block *sb = inode->i_sb;
1660261bca86SAl Viro ino_t ino = inode->i_ino;
1661261bca86SAl Viro struct hlist_head *head = inode_hashtable + hash(sb, ino);
1662261bca86SAl Viro
1663261bca86SAl Viro while (1) {
166472a43d63SAl Viro struct inode *old = NULL;
166567a23c49SDave Chinner spin_lock(&inode_hash_lock);
1666b67bfe0dSSasha Levin hlist_for_each_entry(old, head, i_hash) {
166772a43d63SAl Viro if (old->i_ino != ino)
166872a43d63SAl Viro continue;
166972a43d63SAl Viro if (old->i_sb != sb)
167072a43d63SAl Viro continue;
1671250df6edSDave Chinner spin_lock(&old->i_lock);
1672250df6edSDave Chinner if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1673250df6edSDave Chinner spin_unlock(&old->i_lock);
167472a43d63SAl Viro continue;
1675250df6edSDave Chinner }
167672a43d63SAl Viro break;
167772a43d63SAl Viro }
1678b67bfe0dSSasha Levin if (likely(!old)) {
1679250df6edSDave Chinner spin_lock(&inode->i_lock);
1680c2b6d621SAl Viro inode->i_state |= I_NEW | I_CREATING;
16813f19b2abSDavid Howells hlist_add_head_rcu(&inode->i_hash, head);
1682250df6edSDave Chinner spin_unlock(&inode->i_lock);
168367a23c49SDave Chinner spin_unlock(&inode_hash_lock);
1684261bca86SAl Viro return 0;
1685261bca86SAl Viro }
1686c2b6d621SAl Viro if (unlikely(old->i_state & I_CREATING)) {
1687c2b6d621SAl Viro spin_unlock(&old->i_lock);
1688c2b6d621SAl Viro spin_unlock(&inode_hash_lock);
1689c2b6d621SAl Viro return -EBUSY;
1690c2b6d621SAl Viro }
1691261bca86SAl Viro __iget(old);
1692250df6edSDave Chinner spin_unlock(&old->i_lock);
169367a23c49SDave Chinner spin_unlock(&inode_hash_lock);
1694261bca86SAl Viro wait_on_inode(old);
16951d3382cbSAl Viro if (unlikely(!inode_unhashed(old))) {
1696261bca86SAl Viro iput(old);
1697261bca86SAl Viro return -EBUSY;
1698261bca86SAl Viro }
1699261bca86SAl Viro iput(old);
1700261bca86SAl Viro }
1701261bca86SAl Viro }
1702261bca86SAl Viro EXPORT_SYMBOL(insert_inode_locked);
1703261bca86SAl Viro
insert_inode_locked4(struct inode * inode,unsigned long hashval,int (* test)(struct inode *,void *),void * data)1704261bca86SAl Viro int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1705261bca86SAl Viro int (*test)(struct inode *, void *), void *data)
1706261bca86SAl Viro {
1707c2b6d621SAl Viro struct inode *old;
1708c2b6d621SAl Viro
1709c2b6d621SAl Viro inode->i_state |= I_CREATING;
1710c2b6d621SAl Viro old = inode_insert5(inode, hashval, test, NULL, data);
1711261bca86SAl Viro
171280ea09a0SMiklos Szeredi if (old != inode) {
1713261bca86SAl Viro iput(old);
1714261bca86SAl Viro return -EBUSY;
1715261bca86SAl Viro }
171680ea09a0SMiklos Szeredi return 0;
1717261bca86SAl Viro }
1718261bca86SAl Viro EXPORT_SYMBOL(insert_inode_locked4);
1719261bca86SAl Viro
17201da177e4SLinus Torvalds
generic_delete_inode(struct inode * inode)172145321ac5SAl Viro int generic_delete_inode(struct inode *inode)
17221da177e4SLinus Torvalds {
172345321ac5SAl Viro return 1;
17241da177e4SLinus Torvalds }
17251da177e4SLinus Torvalds EXPORT_SYMBOL(generic_delete_inode);
17261da177e4SLinus Torvalds
172745321ac5SAl Viro /*
172845321ac5SAl Viro * Called when we're dropping the last reference
172945321ac5SAl Viro * to an inode.
173045321ac5SAl Viro *
173145321ac5SAl Viro * Call the FS "drop_inode()" function, defaulting to
173245321ac5SAl Viro * the legacy UNIX filesystem behaviour. If it tells
173345321ac5SAl Viro * us to evict inode, do so. Otherwise, retain inode
173445321ac5SAl Viro * in cache if fs is alive, sync and evict if fs is
173545321ac5SAl Viro * shutting down.
173645321ac5SAl Viro */
iput_final(struct inode * inode)173745321ac5SAl Viro static void iput_final(struct inode *inode)
17381da177e4SLinus Torvalds {
17391da177e4SLinus Torvalds struct super_block *sb = inode->i_sb;
174045321ac5SAl Viro const struct super_operations *op = inode->i_sb->s_op;
17413f19b2abSDavid Howells unsigned long state;
174245321ac5SAl Viro int drop;
17431da177e4SLinus Torvalds
1744250df6edSDave Chinner WARN_ON(inode->i_state & I_NEW);
1745250df6edSDave Chinner
1746e7f59097SAl Viro if (op->drop_inode)
174745321ac5SAl Viro drop = op->drop_inode(inode);
174845321ac5SAl Viro else
174945321ac5SAl Viro drop = generic_drop_inode(inode);
175045321ac5SAl Viro
175188149082SHao Li if (!drop &&
175288149082SHao Li !(inode->i_state & I_DONTCACHE) &&
175388149082SHao Li (sb->s_flags & SB_ACTIVE)) {
175451b8c1feSJohannes Weiner __inode_add_lru(inode, true);
1755250df6edSDave Chinner spin_unlock(&inode->i_lock);
175645321ac5SAl Viro return;
1757991114c6SAlexander Viro }
1758b2b2af8eSDave Chinner
17593f19b2abSDavid Howells state = inode->i_state;
1760b2b2af8eSDave Chinner if (!drop) {
17613f19b2abSDavid Howells WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
1762250df6edSDave Chinner spin_unlock(&inode->i_lock);
17633f19b2abSDavid Howells
17641da177e4SLinus Torvalds write_inode_now(inode, 1);
17653f19b2abSDavid Howells
1766250df6edSDave Chinner spin_lock(&inode->i_lock);
17673f19b2abSDavid Howells state = inode->i_state;
17683f19b2abSDavid Howells WARN_ON(state & I_NEW);
17693f19b2abSDavid Howells state &= ~I_WILL_FREE;
17701da177e4SLinus Torvalds }
17717ccf19a8SNick Piggin
17723f19b2abSDavid Howells WRITE_ONCE(inode->i_state, state | I_FREEING);
1773c4ae0c65SEric Dumazet if (!list_empty(&inode->i_lru))
17749e38d86fSNick Piggin inode_lru_list_del(inode);
1775250df6edSDave Chinner spin_unlock(&inode->i_lock);
1776b2b2af8eSDave Chinner
1777b2b2af8eSDave Chinner evict(inode);
17781da177e4SLinus Torvalds }
17791da177e4SLinus Torvalds
17801da177e4SLinus Torvalds /**
17811da177e4SLinus Torvalds * iput - put an inode
17821da177e4SLinus Torvalds * @inode: inode to put
17831da177e4SLinus Torvalds *
17841da177e4SLinus Torvalds * Puts an inode, dropping its usage count. If the inode use count hits
17851da177e4SLinus Torvalds * zero, the inode is then freed and may also be destroyed.
17861da177e4SLinus Torvalds *
17871da177e4SLinus Torvalds * Consequently, iput() can sleep.
17881da177e4SLinus Torvalds */
iput(struct inode * inode)17891da177e4SLinus Torvalds void iput(struct inode *inode)
17901da177e4SLinus Torvalds {
17910ae45f63STheodore Ts'o if (!inode)
17920ae45f63STheodore Ts'o return;
1793a4ffdde6SAl Viro BUG_ON(inode->i_state & I_CLEAR);
17940ae45f63STheodore Ts'o retry:
17950ae45f63STheodore Ts'o if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
17960ae45f63STheodore Ts'o if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
17970ae45f63STheodore Ts'o atomic_inc(&inode->i_count);
17980ae45f63STheodore Ts'o spin_unlock(&inode->i_lock);
17990ae45f63STheodore Ts'o trace_writeback_lazytime_iput(inode);
18000ae45f63STheodore Ts'o mark_inode_dirty_sync(inode);
18010ae45f63STheodore Ts'o goto retry;
18020ae45f63STheodore Ts'o }
18031da177e4SLinus Torvalds iput_final(inode);
18041da177e4SLinus Torvalds }
18051da177e4SLinus Torvalds }
18061da177e4SLinus Torvalds EXPORT_SYMBOL(iput);
18071da177e4SLinus Torvalds
180830460e1eSCarlos Maiolino #ifdef CONFIG_BLOCK
18091da177e4SLinus Torvalds /**
18101da177e4SLinus Torvalds * bmap - find a block number in a file
181130460e1eSCarlos Maiolino * @inode: inode owning the block number being requested
181230460e1eSCarlos Maiolino * @block: pointer containing the block to find
18131da177e4SLinus Torvalds *
18142b8e8b55SMauro Carvalho Chehab * Replaces the value in ``*block`` with the block number on the device holding
181530460e1eSCarlos Maiolino * corresponding to the requested block number in the file.
181630460e1eSCarlos Maiolino * That is, asked for block 4 of inode 1 the function will replace the
18172b8e8b55SMauro Carvalho Chehab * 4 in ``*block``, with disk block relative to the disk start that holds that
181830460e1eSCarlos Maiolino * block of the file.
181930460e1eSCarlos Maiolino *
182030460e1eSCarlos Maiolino * Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
18212b8e8b55SMauro Carvalho Chehab * hole, returns 0 and ``*block`` is also set to 0.
18221da177e4SLinus Torvalds */
bmap(struct inode * inode,sector_t * block)182330460e1eSCarlos Maiolino int bmap(struct inode *inode, sector_t *block)
18241da177e4SLinus Torvalds {
182530460e1eSCarlos Maiolino if (!inode->i_mapping->a_ops->bmap)
182630460e1eSCarlos Maiolino return -EINVAL;
182730460e1eSCarlos Maiolino
182830460e1eSCarlos Maiolino *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
182930460e1eSCarlos Maiolino return 0;
18301da177e4SLinus Torvalds }
18311da177e4SLinus Torvalds EXPORT_SYMBOL(bmap);
183230460e1eSCarlos Maiolino #endif
18331da177e4SLinus Torvalds
183411ff6f05SMatthew Garrett /*
183511ff6f05SMatthew Garrett * With relative atime, only update atime if the previous atime is
1836d98ffa1aSStephen Kitt * earlier than or equal to either the ctime or mtime,
1837d98ffa1aSStephen Kitt * or if at least a day has passed since the last atime update.
183811ff6f05SMatthew Garrett */
relatime_need_update(struct vfsmount * mnt,struct inode * inode,struct timespec64 now)1839c6718543SMiklos Szeredi static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
18406f22b664SArnd Bergmann struct timespec64 now)
184111ff6f05SMatthew Garrett {
18422276e5baSJeff Layton struct timespec64 ctime;
184311ff6f05SMatthew Garrett
1844c6718543SMiklos Szeredi if (!(mnt->mnt_flags & MNT_RELATIME))
184511ff6f05SMatthew Garrett return 1;
184611ff6f05SMatthew Garrett /*
1847d98ffa1aSStephen Kitt * Is mtime younger than or equal to atime? If yes, update atime:
184811ff6f05SMatthew Garrett */
184995582b00SDeepa Dinamani if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0)
185011ff6f05SMatthew Garrett return 1;
185111ff6f05SMatthew Garrett /*
1852d98ffa1aSStephen Kitt * Is ctime younger than or equal to atime? If yes, update atime:
185311ff6f05SMatthew Garrett */
18542276e5baSJeff Layton ctime = inode_get_ctime(inode);
18552276e5baSJeff Layton if (timespec64_compare(&ctime, &inode->i_atime) >= 0)
185611ff6f05SMatthew Garrett return 1;
185711ff6f05SMatthew Garrett
185811ff6f05SMatthew Garrett /*
185911ff6f05SMatthew Garrett * Is the previous atime value older than a day? If yes,
186011ff6f05SMatthew Garrett * update atime:
186111ff6f05SMatthew Garrett */
186211ff6f05SMatthew Garrett if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
186311ff6f05SMatthew Garrett return 1;
186411ff6f05SMatthew Garrett /*
186511ff6f05SMatthew Garrett * Good, we can skip the atime update:
186611ff6f05SMatthew Garrett */
186711ff6f05SMatthew Garrett return 0;
186811ff6f05SMatthew Garrett }
186911ff6f05SMatthew Garrett
1870541d4c79SJeff Layton /**
1871541d4c79SJeff Layton * inode_update_timestamps - update the timestamps on the inode
1872541d4c79SJeff Layton * @inode: inode to be updated
1873541d4c79SJeff Layton * @flags: S_* flags that needed to be updated
1874541d4c79SJeff Layton *
1875541d4c79SJeff Layton * The update_time function is called when an inode's timestamps need to be
1876541d4c79SJeff Layton * updated for a read or write operation. This function handles updating the
1877541d4c79SJeff Layton * actual timestamps. It's up to the caller to ensure that the inode is marked
1878541d4c79SJeff Layton * dirty appropriately.
1879541d4c79SJeff Layton *
1880541d4c79SJeff Layton * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated,
1881541d4c79SJeff Layton * attempt to update all three of them. S_ATIME updates can be handled
1882541d4c79SJeff Layton * independently of the rest.
1883541d4c79SJeff Layton *
1884541d4c79SJeff Layton * Returns a set of S_* flags indicating which values changed.
1885541d4c79SJeff Layton */
inode_update_timestamps(struct inode * inode,int flags)1886541d4c79SJeff Layton int inode_update_timestamps(struct inode *inode, int flags)
1887c3b2da31SJosef Bacik {
1888541d4c79SJeff Layton int updated = 0;
1889541d4c79SJeff Layton struct timespec64 now;
1890c3b2da31SJosef Bacik
1891541d4c79SJeff Layton if (flags & (S_MTIME|S_CTIME|S_VERSION)) {
1892541d4c79SJeff Layton struct timespec64 ctime = inode_get_ctime(inode);
18930ae45f63STheodore Ts'o
1894541d4c79SJeff Layton now = inode_set_ctime_current(inode);
1895541d4c79SJeff Layton if (!timespec64_equal(&now, &ctime))
1896541d4c79SJeff Layton updated |= S_CTIME;
1897541d4c79SJeff Layton if (!timespec64_equal(&now, &inode->i_mtime)) {
1898541d4c79SJeff Layton inode->i_mtime = now;
1899541d4c79SJeff Layton updated |= S_MTIME;
1900541d4c79SJeff Layton }
1901541d4c79SJeff Layton if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated))
1902541d4c79SJeff Layton updated |= S_VERSION;
1903541d4c79SJeff Layton } else {
1904541d4c79SJeff Layton now = current_time(inode);
1905e20b14dbSEric Biggers }
1906e20b14dbSEric Biggers
1907541d4c79SJeff Layton if (flags & S_ATIME) {
1908541d4c79SJeff Layton if (!timespec64_equal(&now, &inode->i_atime)) {
1909541d4c79SJeff Layton inode->i_atime = now;
1910541d4c79SJeff Layton updated |= S_ATIME;
1911541d4c79SJeff Layton }
1912541d4c79SJeff Layton }
1913541d4c79SJeff Layton return updated;
1914541d4c79SJeff Layton }
1915541d4c79SJeff Layton EXPORT_SYMBOL(inode_update_timestamps);
1916e20b14dbSEric Biggers
1917541d4c79SJeff Layton /**
1918541d4c79SJeff Layton * generic_update_time - update the timestamps on the inode
1919541d4c79SJeff Layton * @inode: inode to be updated
1920541d4c79SJeff Layton * @flags: S_* flags that needed to be updated
1921541d4c79SJeff Layton *
1922541d4c79SJeff Layton * The update_time function is called when an inode's timestamps need to be
1923541d4c79SJeff Layton * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME,
1924541d4c79SJeff Layton * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME
1925541d4c79SJeff Layton * updates can be handled done independently of the rest.
1926541d4c79SJeff Layton *
1927541d4c79SJeff Layton * Returns a S_* mask indicating which fields were updated.
1928541d4c79SJeff Layton */
generic_update_time(struct inode * inode,int flags)1929541d4c79SJeff Layton int generic_update_time(struct inode *inode, int flags)
1930541d4c79SJeff Layton {
1931541d4c79SJeff Layton int updated = inode_update_timestamps(inode, flags);
1932541d4c79SJeff Layton int dirty_flags = 0;
1933541d4c79SJeff Layton
1934541d4c79SJeff Layton if (updated & (S_ATIME|S_MTIME|S_CTIME))
1935541d4c79SJeff Layton dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC;
1936541d4c79SJeff Layton if (updated & S_VERSION)
1937541d4c79SJeff Layton dirty_flags |= I_DIRTY_SYNC;
1938e20b14dbSEric Biggers __mark_inode_dirty(inode, dirty_flags);
1939541d4c79SJeff Layton return updated;
1940c3b2da31SJosef Bacik }
19410ae45f63STheodore Ts'o EXPORT_SYMBOL(generic_update_time);
19420ae45f63STheodore Ts'o
19430ae45f63STheodore Ts'o /*
19440ae45f63STheodore Ts'o * This does the actual work of updating an inodes time or version. Must have
19450ae45f63STheodore Ts'o * had called mnt_want_write() before calling this.
19460ae45f63STheodore Ts'o */
inode_update_time(struct inode * inode,int flags)1947913e9928SJeff Layton int inode_update_time(struct inode *inode, int flags)
19480ae45f63STheodore Ts'o {
194923b424d9SDeepa Dinamani if (inode->i_op->update_time)
1950913e9928SJeff Layton return inode->i_op->update_time(inode, flags);
1951541d4c79SJeff Layton generic_update_time(inode, flags);
1952541d4c79SJeff Layton return 0;
19530ae45f63STheodore Ts'o }
1954e60feb44SJosef Bacik EXPORT_SYMBOL(inode_update_time);
1955c3b2da31SJosef Bacik
19561da177e4SLinus Torvalds /**
1957961f3c89SMauro Carvalho Chehab * atime_needs_update - update the access time
1958185553b2SRandy Dunlap * @path: the &struct path to update
195930fdc8eeSRandy Dunlap * @inode: inode to update
19601da177e4SLinus Torvalds *
19611da177e4SLinus Torvalds * Update the accessed time on an inode and mark it for writeback.
19621da177e4SLinus Torvalds * This function automatically handles read only file systems and media,
19631da177e4SLinus Torvalds * as well as the "noatime" flag and inode specific "noatime" markers.
19641da177e4SLinus Torvalds */
atime_needs_update(const struct path * path,struct inode * inode)1965c6718543SMiklos Szeredi bool atime_needs_update(const struct path *path, struct inode *inode)
19668fa9dd24SNeilBrown {
19678fa9dd24SNeilBrown struct vfsmount *mnt = path->mnt;
196895582b00SDeepa Dinamani struct timespec64 now;
19698fa9dd24SNeilBrown
19708fa9dd24SNeilBrown if (inode->i_flags & S_NOATIME)
19718fa9dd24SNeilBrown return false;
19720bd23d09SEric W. Biederman
19730bd23d09SEric W. Biederman /* Atime updates will likely cause i_uid and i_gid to be written
19740bd23d09SEric W. Biederman * back improprely if their true value is unknown to the vfs.
19750bd23d09SEric W. Biederman */
19764609e1f1SChristian Brauner if (HAS_UNMAPPED_ID(mnt_idmap(mnt), inode))
19770bd23d09SEric W. Biederman return false;
19780bd23d09SEric W. Biederman
19798fa9dd24SNeilBrown if (IS_NOATIME(inode))
19808fa9dd24SNeilBrown return false;
19811751e8a6SLinus Torvalds if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
19828fa9dd24SNeilBrown return false;
19838fa9dd24SNeilBrown
19848fa9dd24SNeilBrown if (mnt->mnt_flags & MNT_NOATIME)
19858fa9dd24SNeilBrown return false;
19868fa9dd24SNeilBrown if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
19878fa9dd24SNeilBrown return false;
19888fa9dd24SNeilBrown
1989c2050a45SDeepa Dinamani now = current_time(inode);
19908fa9dd24SNeilBrown
19916f22b664SArnd Bergmann if (!relatime_need_update(mnt, inode, now))
19928fa9dd24SNeilBrown return false;
19938fa9dd24SNeilBrown
199495582b00SDeepa Dinamani if (timespec64_equal(&inode->i_atime, &now))
19958fa9dd24SNeilBrown return false;
19968fa9dd24SNeilBrown
19978fa9dd24SNeilBrown return true;
19988fa9dd24SNeilBrown }
19998fa9dd24SNeilBrown
touch_atime(const struct path * path)2000badcf2b7SAl Viro void touch_atime(const struct path *path)
20011da177e4SLinus Torvalds {
200268ac1234SAl Viro struct vfsmount *mnt = path->mnt;
2003df2b1afdSDavid Howells struct inode *inode = d_inode(path->dentry);
20041da177e4SLinus Torvalds
2005c6718543SMiklos Szeredi if (!atime_needs_update(path, inode))
2006b12536c2SAndi Kleen return;
2007b12536c2SAndi Kleen
20085d37e9e6SJan Kara if (!sb_start_write_trylock(inode->i_sb))
2009b12536c2SAndi Kleen return;
201047ae32d6SValerie Henson
20118fa9dd24SNeilBrown if (__mnt_want_write(mnt) != 0)
20125d37e9e6SJan Kara goto skip_update;
2013c3b2da31SJosef Bacik /*
2014c3b2da31SJosef Bacik * File systems can error out when updating inodes if they need to
2015c3b2da31SJosef Bacik * allocate new space to modify an inode (such is the case for
2016c3b2da31SJosef Bacik * Btrfs), but since we touch atime while walking down the path we
2017c3b2da31SJosef Bacik * really don't care if we failed to update the atime of the file,
2018c3b2da31SJosef Bacik * so just ignore the return value.
20192bc55652SAlexander Block * We may also fail on filesystems that have the ability to make parts
20202bc55652SAlexander Block * of the fs read only, e.g. subvolumes in Btrfs.
2021c3b2da31SJosef Bacik */
2022913e9928SJeff Layton inode_update_time(inode, S_ATIME);
20235d37e9e6SJan Kara __mnt_drop_write(mnt);
20245d37e9e6SJan Kara skip_update:
20255d37e9e6SJan Kara sb_end_write(inode->i_sb);
20261da177e4SLinus Torvalds }
2027869243a0SChristoph Hellwig EXPORT_SYMBOL(touch_atime);
20281da177e4SLinus Torvalds
20293ed37648SCong Wang /*
2030dbfae0cdSJan Kara * Return mask of changes for notify_change() that need to be done as a
2031dbfae0cdSJan Kara * response to write or truncate. Return 0 if nothing has to be changed.
2032dbfae0cdSJan Kara * Negative value on error (change should be denied).
2033dbfae0cdSJan Kara */
dentry_needs_remove_privs(struct mnt_idmap * idmap,struct dentry * dentry)20349452e93eSChristian Brauner int dentry_needs_remove_privs(struct mnt_idmap *idmap,
2035ed5a7047SChristian Brauner struct dentry *dentry)
2036dbfae0cdSJan Kara {
2037dbfae0cdSJan Kara struct inode *inode = d_inode(dentry);
2038dbfae0cdSJan Kara int mask = 0;
2039dbfae0cdSJan Kara int ret;
2040dbfae0cdSJan Kara
2041dbfae0cdSJan Kara if (IS_NOSEC(inode))
2042dbfae0cdSJan Kara return 0;
2043dbfae0cdSJan Kara
20449452e93eSChristian Brauner mask = setattr_should_drop_suidgid(idmap, inode);
2045dbfae0cdSJan Kara ret = security_inode_need_killpriv(dentry);
2046dbfae0cdSJan Kara if (ret < 0)
2047dbfae0cdSJan Kara return ret;
2048dbfae0cdSJan Kara if (ret)
2049dbfae0cdSJan Kara mask |= ATTR_KILL_PRIV;
2050dbfae0cdSJan Kara return mask;
2051dbfae0cdSJan Kara }
2052dbfae0cdSJan Kara
__remove_privs(struct mnt_idmap * idmap,struct dentry * dentry,int kill)2053abf08576SChristian Brauner static int __remove_privs(struct mnt_idmap *idmap,
2054643fe55aSChristian Brauner struct dentry *dentry, int kill)
20553ed37648SCong Wang {
20563ed37648SCong Wang struct iattr newattrs;
20573ed37648SCong Wang
20583ed37648SCong Wang newattrs.ia_valid = ATTR_FORCE | kill;
205927ac0ffeSJ. Bruce Fields /*
206027ac0ffeSJ. Bruce Fields * Note we call this on write, so notify_change will not
206127ac0ffeSJ. Bruce Fields * encounter any conflicting delegations:
206227ac0ffeSJ. Bruce Fields */
2063abf08576SChristian Brauner return notify_change(idmap, dentry, &newattrs, NULL);
20643ed37648SCong Wang }
20653ed37648SCong Wang
__file_remove_privs(struct file * file,unsigned int flags)2066faf99b56SStefan Roesch static int __file_remove_privs(struct file *file, unsigned int flags)
20673ed37648SCong Wang {
2068c1892c37SMiklos Szeredi struct dentry *dentry = file_dentry(file);
2069c1892c37SMiklos Szeredi struct inode *inode = file_inode(file);
207041191cf6SStefan Roesch int error = 0;
2071dbfae0cdSJan Kara int kill;
20723ed37648SCong Wang
2073f69e749aSAlexander Lochmann if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
20743ed37648SCong Wang return 0;
20753ed37648SCong Wang
20769452e93eSChristian Brauner kill = dentry_needs_remove_privs(file_mnt_idmap(file), dentry);
207741191cf6SStefan Roesch if (kill < 0)
2078dbfae0cdSJan Kara return kill;
2079faf99b56SStefan Roesch
208041191cf6SStefan Roesch if (kill) {
2081faf99b56SStefan Roesch if (flags & IOCB_NOWAIT)
2082faf99b56SStefan Roesch return -EAGAIN;
2083faf99b56SStefan Roesch
2084abf08576SChristian Brauner error = __remove_privs(file_mnt_idmap(file), dentry, kill);
208541191cf6SStefan Roesch }
208641191cf6SStefan Roesch
20872426f391SJan Kara if (!error)
20882426f391SJan Kara inode_has_no_xattr(inode);
20893ed37648SCong Wang return error;
20903ed37648SCong Wang }
2091faf99b56SStefan Roesch
2092faf99b56SStefan Roesch /**
2093faf99b56SStefan Roesch * file_remove_privs - remove special file privileges (suid, capabilities)
2094faf99b56SStefan Roesch * @file: file to remove privileges from
2095faf99b56SStefan Roesch *
2096faf99b56SStefan Roesch * When file is modified by a write or truncation ensure that special
2097faf99b56SStefan Roesch * file privileges are removed.
2098faf99b56SStefan Roesch *
2099faf99b56SStefan Roesch * Return: 0 on success, negative errno on failure.
2100faf99b56SStefan Roesch */
file_remove_privs(struct file * file)2101faf99b56SStefan Roesch int file_remove_privs(struct file *file)
2102faf99b56SStefan Roesch {
2103faf99b56SStefan Roesch return __file_remove_privs(file, 0);
2104faf99b56SStefan Roesch }
21055fa8e0a1SJan Kara EXPORT_SYMBOL(file_remove_privs);
21063ed37648SCong Wang
inode_needs_update_time(struct inode * inode)2107913e9928SJeff Layton static int inode_needs_update_time(struct inode *inode)
21081da177e4SLinus Torvalds {
2109c3b2da31SJosef Bacik int sync_it = 0;
2110647aa768SChristian Brauner struct timespec64 now = current_time(inode);
21112276e5baSJeff Layton struct timespec64 ctime;
21121da177e4SLinus Torvalds
2113ce06e0b2SAndi Kleen /* First try to exhaust all avenues to not sync */
21141da177e4SLinus Torvalds if (IS_NOCMTIME(inode))
2115c3b2da31SJosef Bacik return 0;
211620ddee2cSDave Hansen
2117913e9928SJeff Layton if (!timespec64_equal(&inode->i_mtime, &now))
2118ce06e0b2SAndi Kleen sync_it = S_MTIME;
2119ce06e0b2SAndi Kleen
21202276e5baSJeff Layton ctime = inode_get_ctime(inode);
2121913e9928SJeff Layton if (!timespec64_equal(&ctime, &now))
2122ce06e0b2SAndi Kleen sync_it |= S_CTIME;
2123ce06e0b2SAndi Kleen
2124e38cf302SJeff Layton if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
2125ce06e0b2SAndi Kleen sync_it |= S_VERSION;
2126ce06e0b2SAndi Kleen
21276a2aa5d8SStefan Roesch return sync_it;
21286a2aa5d8SStefan Roesch }
2129ed97bd37SAndreas Mohr
__file_update_time(struct file * file,int sync_mode)2130913e9928SJeff Layton static int __file_update_time(struct file *file, int sync_mode)
21316a2aa5d8SStefan Roesch {
21326a2aa5d8SStefan Roesch int ret = 0;
21336a2aa5d8SStefan Roesch struct inode *inode = file_inode(file);
21346a2aa5d8SStefan Roesch
21356a2aa5d8SStefan Roesch /* try to update time settings */
21366a2aa5d8SStefan Roesch if (!__mnt_want_write_file(file)) {
2137913e9928SJeff Layton ret = inode_update_time(inode, sync_mode);
2138eb04c282SJan Kara __mnt_drop_write_file(file);
21396a2aa5d8SStefan Roesch }
2140c3b2da31SJosef Bacik
2141c3b2da31SJosef Bacik return ret;
21421da177e4SLinus Torvalds }
21436a2aa5d8SStefan Roesch
21446a2aa5d8SStefan Roesch /**
21456a2aa5d8SStefan Roesch * file_update_time - update mtime and ctime time
21466a2aa5d8SStefan Roesch * @file: file accessed
21476a2aa5d8SStefan Roesch *
21486a2aa5d8SStefan Roesch * Update the mtime and ctime members of an inode and mark the inode for
21496a2aa5d8SStefan Roesch * writeback. Note that this function is meant exclusively for usage in
21506a2aa5d8SStefan Roesch * the file write path of filesystems, and filesystems may choose to
21516a2aa5d8SStefan Roesch * explicitly ignore updates via this function with the _NOCMTIME inode
21526a2aa5d8SStefan Roesch * flag, e.g. for network filesystem where these imestamps are handled
21536a2aa5d8SStefan Roesch * by the server. This can return an error for file systems who need to
21546a2aa5d8SStefan Roesch * allocate space in order to update an inode.
21556a2aa5d8SStefan Roesch *
21566a2aa5d8SStefan Roesch * Return: 0 on success, negative errno on failure.
21576a2aa5d8SStefan Roesch */
file_update_time(struct file * file)21586a2aa5d8SStefan Roesch int file_update_time(struct file *file)
21596a2aa5d8SStefan Roesch {
21606a2aa5d8SStefan Roesch int ret;
21616a2aa5d8SStefan Roesch struct inode *inode = file_inode(file);
21626a2aa5d8SStefan Roesch
2163913e9928SJeff Layton ret = inode_needs_update_time(inode);
21646a2aa5d8SStefan Roesch if (ret <= 0)
21656a2aa5d8SStefan Roesch return ret;
21666a2aa5d8SStefan Roesch
2167913e9928SJeff Layton return __file_update_time(file, ret);
21686a2aa5d8SStefan Roesch }
2169870f4817SChristoph Hellwig EXPORT_SYMBOL(file_update_time);
21701da177e4SLinus Torvalds
2171faf99b56SStefan Roesch /**
217266fa3cedSStefan Roesch * file_modified_flags - handle mandated vfs changes when modifying a file
217366fa3cedSStefan Roesch * @file: file that was modified
217466fa3cedSStefan Roesch * @flags: kiocb flags
217566fa3cedSStefan Roesch *
217666fa3cedSStefan Roesch * When file has been modified ensure that special
217766fa3cedSStefan Roesch * file privileges are removed and time settings are updated.
217866fa3cedSStefan Roesch *
217966fa3cedSStefan Roesch * If IOCB_NOWAIT is set, special file privileges will not be removed and
218066fa3cedSStefan Roesch * time settings will not be updated. It will return -EAGAIN.
218166fa3cedSStefan Roesch *
218266fa3cedSStefan Roesch * Context: Caller must hold the file's inode lock.
218366fa3cedSStefan Roesch *
218466fa3cedSStefan Roesch * Return: 0 on success, negative errno on failure.
218566fa3cedSStefan Roesch */
file_modified_flags(struct file * file,int flags)218666fa3cedSStefan Roesch static int file_modified_flags(struct file *file, int flags)
218766fa3cedSStefan Roesch {
218866fa3cedSStefan Roesch int ret;
218966fa3cedSStefan Roesch struct inode *inode = file_inode(file);
219066fa3cedSStefan Roesch
219166fa3cedSStefan Roesch /*
219266fa3cedSStefan Roesch * Clear the security bits if the process is not being run by root.
219366fa3cedSStefan Roesch * This keeps people from modifying setuid and setgid binaries.
219466fa3cedSStefan Roesch */
219566fa3cedSStefan Roesch ret = __file_remove_privs(file, flags);
219666fa3cedSStefan Roesch if (ret)
219766fa3cedSStefan Roesch return ret;
219866fa3cedSStefan Roesch
219966fa3cedSStefan Roesch if (unlikely(file->f_mode & FMODE_NOCMTIME))
220066fa3cedSStefan Roesch return 0;
220166fa3cedSStefan Roesch
2202913e9928SJeff Layton ret = inode_needs_update_time(inode);
220366fa3cedSStefan Roesch if (ret <= 0)
220466fa3cedSStefan Roesch return ret;
220566fa3cedSStefan Roesch if (flags & IOCB_NOWAIT)
220666fa3cedSStefan Roesch return -EAGAIN;
220766fa3cedSStefan Roesch
2208913e9928SJeff Layton return __file_update_time(file, ret);
220966fa3cedSStefan Roesch }
221066fa3cedSStefan Roesch
221166fa3cedSStefan Roesch /**
2212faf99b56SStefan Roesch * file_modified - handle mandated vfs changes when modifying a file
2213faf99b56SStefan Roesch * @file: file that was modified
2214faf99b56SStefan Roesch *
2215faf99b56SStefan Roesch * When file has been modified ensure that special
2216faf99b56SStefan Roesch * file privileges are removed and time settings are updated.
2217faf99b56SStefan Roesch *
2218faf99b56SStefan Roesch * Context: Caller must hold the file's inode lock.
2219faf99b56SStefan Roesch *
2220faf99b56SStefan Roesch * Return: 0 on success, negative errno on failure.
2221faf99b56SStefan Roesch */
file_modified(struct file * file)2222e38f7f53SAmir Goldstein int file_modified(struct file *file)
2223e38f7f53SAmir Goldstein {
222466fa3cedSStefan Roesch return file_modified_flags(file, 0);
2225e38f7f53SAmir Goldstein }
2226e38f7f53SAmir Goldstein EXPORT_SYMBOL(file_modified);
2227e38f7f53SAmir Goldstein
222866fa3cedSStefan Roesch /**
222966fa3cedSStefan Roesch * kiocb_modified - handle mandated vfs changes when modifying a file
223066fa3cedSStefan Roesch * @iocb: iocb that was modified
223166fa3cedSStefan Roesch *
223266fa3cedSStefan Roesch * When file has been modified ensure that special
223366fa3cedSStefan Roesch * file privileges are removed and time settings are updated.
223466fa3cedSStefan Roesch *
223566fa3cedSStefan Roesch * Context: Caller must hold the file's inode lock.
223666fa3cedSStefan Roesch *
223766fa3cedSStefan Roesch * Return: 0 on success, negative errno on failure.
223866fa3cedSStefan Roesch */
kiocb_modified(struct kiocb * iocb)223966fa3cedSStefan Roesch int kiocb_modified(struct kiocb *iocb)
224066fa3cedSStefan Roesch {
224166fa3cedSStefan Roesch return file_modified_flags(iocb->ki_filp, iocb->ki_flags);
224266fa3cedSStefan Roesch }
224366fa3cedSStefan Roesch EXPORT_SYMBOL_GPL(kiocb_modified);
224466fa3cedSStefan Roesch
inode_needs_sync(struct inode * inode)22451da177e4SLinus Torvalds int inode_needs_sync(struct inode *inode)
22461da177e4SLinus Torvalds {
22471da177e4SLinus Torvalds if (IS_SYNC(inode))
22481da177e4SLinus Torvalds return 1;
22491da177e4SLinus Torvalds if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
22501da177e4SLinus Torvalds return 1;
22511da177e4SLinus Torvalds return 0;
22521da177e4SLinus Torvalds }
22531da177e4SLinus Torvalds EXPORT_SYMBOL(inode_needs_sync);
22541da177e4SLinus Torvalds
22551da177e4SLinus Torvalds /*
2256168a9fd6SMiklos Szeredi * If we try to find an inode in the inode hash while it is being
2257168a9fd6SMiklos Szeredi * deleted, we have to wait until the filesystem completes its
2258168a9fd6SMiklos Szeredi * deletion before reporting that it isn't found. This function waits
2259168a9fd6SMiklos Szeredi * until the deletion _might_ have completed. Callers are responsible
2260168a9fd6SMiklos Szeredi * to recheck inode state.
2261168a9fd6SMiklos Szeredi *
2262eaff8079SChristoph Hellwig * It doesn't matter if I_NEW is not set initially, a call to
2263250df6edSDave Chinner * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
2264250df6edSDave Chinner * will DTRT.
22651da177e4SLinus Torvalds */
__wait_on_freeing_inode(struct inode * inode)22661da177e4SLinus Torvalds static void __wait_on_freeing_inode(struct inode *inode)
22671da177e4SLinus Torvalds {
22681da177e4SLinus Torvalds wait_queue_head_t *wq;
2269eaff8079SChristoph Hellwig DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
2270eaff8079SChristoph Hellwig wq = bit_waitqueue(&inode->i_state, __I_NEW);
227121417136SIngo Molnar prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
2272250df6edSDave Chinner spin_unlock(&inode->i_lock);
227367a23c49SDave Chinner spin_unlock(&inode_hash_lock);
22741da177e4SLinus Torvalds schedule();
227521417136SIngo Molnar finish_wait(wq, &wait.wq_entry);
227667a23c49SDave Chinner spin_lock(&inode_hash_lock);
22771da177e4SLinus Torvalds }
22781da177e4SLinus Torvalds
22791da177e4SLinus Torvalds static __initdata unsigned long ihash_entries;
set_ihash_entries(char * str)22801da177e4SLinus Torvalds static int __init set_ihash_entries(char *str)
22811da177e4SLinus Torvalds {
22821da177e4SLinus Torvalds if (!str)
22831da177e4SLinus Torvalds return 0;
22841da177e4SLinus Torvalds ihash_entries = simple_strtoul(str, &str, 0);
22851da177e4SLinus Torvalds return 1;
22861da177e4SLinus Torvalds }
22871da177e4SLinus Torvalds __setup("ihash_entries=", set_ihash_entries);
22881da177e4SLinus Torvalds
22891da177e4SLinus Torvalds /*
22901da177e4SLinus Torvalds * Initialize the waitqueues and inode hash table.
22911da177e4SLinus Torvalds */
inode_init_early(void)22921da177e4SLinus Torvalds void __init inode_init_early(void)
22931da177e4SLinus Torvalds {
22941da177e4SLinus Torvalds /* If hashes are distributed across NUMA nodes, defer
22951da177e4SLinus Torvalds * hash allocation until vmalloc space is available.
22961da177e4SLinus Torvalds */
22971da177e4SLinus Torvalds if (hashdist)
22981da177e4SLinus Torvalds return;
22991da177e4SLinus Torvalds
23001da177e4SLinus Torvalds inode_hashtable =
23011da177e4SLinus Torvalds alloc_large_system_hash("Inode-cache",
23021da177e4SLinus Torvalds sizeof(struct hlist_head),
23031da177e4SLinus Torvalds ihash_entries,
23041da177e4SLinus Torvalds 14,
23053d375d78SPavel Tatashin HASH_EARLY | HASH_ZERO,
23061da177e4SLinus Torvalds &i_hash_shift,
23071da177e4SLinus Torvalds &i_hash_mask,
230831fe62b9STim Bird 0,
23091da177e4SLinus Torvalds 0);
23101da177e4SLinus Torvalds }
23111da177e4SLinus Torvalds
inode_init(void)231274bf17cfSDenis Cheng void __init inode_init(void)
23131da177e4SLinus Torvalds {
23141da177e4SLinus Torvalds /* inode slab cache */
2315b0196009SPaul Jackson inode_cachep = kmem_cache_create("inode_cache",
2316b0196009SPaul Jackson sizeof(struct inode),
2317b0196009SPaul Jackson 0,
2318b0196009SPaul Jackson (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
23195d097056SVladimir Davydov SLAB_MEM_SPREAD|SLAB_ACCOUNT),
232020c2df83SPaul Mundt init_once);
23211da177e4SLinus Torvalds
23221da177e4SLinus Torvalds /* Hash may have been set up in inode_init_early */
23231da177e4SLinus Torvalds if (!hashdist)
23241da177e4SLinus Torvalds return;
23251da177e4SLinus Torvalds
23261da177e4SLinus Torvalds inode_hashtable =
23271da177e4SLinus Torvalds alloc_large_system_hash("Inode-cache",
23281da177e4SLinus Torvalds sizeof(struct hlist_head),
23291da177e4SLinus Torvalds ihash_entries,
23301da177e4SLinus Torvalds 14,
23313d375d78SPavel Tatashin HASH_ZERO,
23321da177e4SLinus Torvalds &i_hash_shift,
23331da177e4SLinus Torvalds &i_hash_mask,
233431fe62b9STim Bird 0,
23351da177e4SLinus Torvalds 0);
23361da177e4SLinus Torvalds }
23371da177e4SLinus Torvalds
init_special_inode(struct inode * inode,umode_t mode,dev_t rdev)23381da177e4SLinus Torvalds void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
23391da177e4SLinus Torvalds {
23401da177e4SLinus Torvalds inode->i_mode = mode;
23411da177e4SLinus Torvalds if (S_ISCHR(mode)) {
23421da177e4SLinus Torvalds inode->i_fop = &def_chr_fops;
23431da177e4SLinus Torvalds inode->i_rdev = rdev;
23441da177e4SLinus Torvalds } else if (S_ISBLK(mode)) {
2345bda2795aSChristoph Hellwig if (IS_ENABLED(CONFIG_BLOCK))
23461da177e4SLinus Torvalds inode->i_fop = &def_blk_fops;
23471da177e4SLinus Torvalds inode->i_rdev = rdev;
23481da177e4SLinus Torvalds } else if (S_ISFIFO(mode))
2349599a0ac1SAl Viro inode->i_fop = &pipefifo_fops;
23501da177e4SLinus Torvalds else if (S_ISSOCK(mode))
2351bd9b51e7SAl Viro ; /* leave it no_open_fops */
23521da177e4SLinus Torvalds else
2353af0d9ae8SManish Katiyar printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
2354af0d9ae8SManish Katiyar " inode %s:%lu\n", mode, inode->i_sb->s_id,
2355af0d9ae8SManish Katiyar inode->i_ino);
23561da177e4SLinus Torvalds }
23571da177e4SLinus Torvalds EXPORT_SYMBOL(init_special_inode);
2358a1bd120dSDmitry Monakhov
2359a1bd120dSDmitry Monakhov /**
2360eaae668dSBen Hutchings * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
2361f2d40141SChristian Brauner * @idmap: idmap of the mount the inode was created from
2362a1bd120dSDmitry Monakhov * @inode: New inode
2363a1bd120dSDmitry Monakhov * @dir: Directory inode
2364a1bd120dSDmitry Monakhov * @mode: mode of the new inode
236521cb47beSChristian Brauner *
2366f2d40141SChristian Brauner * If the inode has been created through an idmapped mount the idmap of
2367f2d40141SChristian Brauner * the vfsmount must be passed through @idmap. This function will then take
2368f2d40141SChristian Brauner * care to map the inode according to @idmap before checking permissions
236921cb47beSChristian Brauner * and initializing i_uid and i_gid. On non-idmapped mounts or if permission
2370f2d40141SChristian Brauner * checking is to be performed on the raw inode simply pass @nop_mnt_idmap.
2371a1bd120dSDmitry Monakhov */
inode_init_owner(struct mnt_idmap * idmap,struct inode * inode,const struct inode * dir,umode_t mode)2372f2d40141SChristian Brauner void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
237321cb47beSChristian Brauner const struct inode *dir, umode_t mode)
2374a1bd120dSDmitry Monakhov {
2375c14329d3SChristian Brauner inode_fsuid_set(inode, idmap);
2376a1bd120dSDmitry Monakhov if (dir && dir->i_mode & S_ISGID) {
2377a1bd120dSDmitry Monakhov inode->i_gid = dir->i_gid;
23780fa3ecd8SLinus Torvalds
23790fa3ecd8SLinus Torvalds /* Directories are special, and always inherit S_ISGID */
2380a1bd120dSDmitry Monakhov if (S_ISDIR(mode))
2381a1bd120dSDmitry Monakhov mode |= S_ISGID;
2382a1bd120dSDmitry Monakhov } else
2383c14329d3SChristian Brauner inode_fsgid_set(inode, idmap);
2384a1bd120dSDmitry Monakhov inode->i_mode = mode;
2385a1bd120dSDmitry Monakhov }
2386a1bd120dSDmitry Monakhov EXPORT_SYMBOL(inode_init_owner);
2387e795b717SSerge E. Hallyn
23882e149670SSerge E. Hallyn /**
23892e149670SSerge E. Hallyn * inode_owner_or_capable - check current task permissions to inode
239001beba79SChristian Brauner * @idmap: idmap of the mount the inode was found from
23912e149670SSerge E. Hallyn * @inode: inode being checked
23922e149670SSerge E. Hallyn *
239323adbe12SAndy Lutomirski * Return true if current either has CAP_FOWNER in a namespace with the
239423adbe12SAndy Lutomirski * inode owner uid mapped, or owns the file.
239521cb47beSChristian Brauner *
239601beba79SChristian Brauner * If the inode has been found through an idmapped mount the idmap of
239701beba79SChristian Brauner * the vfsmount must be passed through @idmap. This function will then take
239801beba79SChristian Brauner * care to map the inode according to @idmap before checking permissions.
239921cb47beSChristian Brauner * On non-idmapped mounts or if permission checking is to be performed on the
240001beba79SChristian Brauner * raw inode simply passs @nop_mnt_idmap.
2401e795b717SSerge E. Hallyn */
inode_owner_or_capable(struct mnt_idmap * idmap,const struct inode * inode)240201beba79SChristian Brauner bool inode_owner_or_capable(struct mnt_idmap *idmap,
240321cb47beSChristian Brauner const struct inode *inode)
2404e795b717SSerge E. Hallyn {
2405a2bd096fSChristian Brauner vfsuid_t vfsuid;
240623adbe12SAndy Lutomirski struct user_namespace *ns;
240723adbe12SAndy Lutomirski
2408e67fe633SChristian Brauner vfsuid = i_uid_into_vfsuid(idmap, inode);
2409a2bd096fSChristian Brauner if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
2410e795b717SSerge E. Hallyn return true;
241123adbe12SAndy Lutomirski
241223adbe12SAndy Lutomirski ns = current_user_ns();
2413a2bd096fSChristian Brauner if (vfsuid_has_mapping(ns, vfsuid) && ns_capable(ns, CAP_FOWNER))
2414e795b717SSerge E. Hallyn return true;
2415e795b717SSerge E. Hallyn return false;
2416e795b717SSerge E. Hallyn }
24172e149670SSerge E. Hallyn EXPORT_SYMBOL(inode_owner_or_capable);
24181d59d61fSTrond Myklebust
24191d59d61fSTrond Myklebust /*
24201d59d61fSTrond Myklebust * Direct i/o helper functions
24211d59d61fSTrond Myklebust */
__inode_dio_wait(struct inode * inode)24221d59d61fSTrond Myklebust static void __inode_dio_wait(struct inode *inode)
24231d59d61fSTrond Myklebust {
24241d59d61fSTrond Myklebust wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
24251d59d61fSTrond Myklebust DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
24261d59d61fSTrond Myklebust
24271d59d61fSTrond Myklebust do {
242821417136SIngo Molnar prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
24291d59d61fSTrond Myklebust if (atomic_read(&inode->i_dio_count))
24301d59d61fSTrond Myklebust schedule();
24311d59d61fSTrond Myklebust } while (atomic_read(&inode->i_dio_count));
243221417136SIngo Molnar finish_wait(wq, &q.wq_entry);
24331d59d61fSTrond Myklebust }
24341d59d61fSTrond Myklebust
24351d59d61fSTrond Myklebust /**
24361d59d61fSTrond Myklebust * inode_dio_wait - wait for outstanding DIO requests to finish
24371d59d61fSTrond Myklebust * @inode: inode to wait for
24381d59d61fSTrond Myklebust *
24391d59d61fSTrond Myklebust * Waits for all pending direct I/O requests to finish so that we can
24401d59d61fSTrond Myklebust * proceed with a truncate or equivalent operation.
24411d59d61fSTrond Myklebust *
24421d59d61fSTrond Myklebust * Must be called under a lock that serializes taking new references
24431d59d61fSTrond Myklebust * to i_dio_count, usually by inode->i_mutex.
24441d59d61fSTrond Myklebust */
inode_dio_wait(struct inode * inode)24451d59d61fSTrond Myklebust void inode_dio_wait(struct inode *inode)
24461d59d61fSTrond Myklebust {
24471d59d61fSTrond Myklebust if (atomic_read(&inode->i_dio_count))
24481d59d61fSTrond Myklebust __inode_dio_wait(inode);
24491d59d61fSTrond Myklebust }
24501d59d61fSTrond Myklebust EXPORT_SYMBOL(inode_dio_wait);
24511d59d61fSTrond Myklebust
24521d59d61fSTrond Myklebust /*
24535f16f322STheodore Ts'o * inode_set_flags - atomically set some inode flags
24545f16f322STheodore Ts'o *
24555f16f322STheodore Ts'o * Note: the caller should be holding i_mutex, or else be sure that
24565f16f322STheodore Ts'o * they have exclusive access to the inode structure (i.e., while the
24575f16f322STheodore Ts'o * inode is being instantiated). The reason for the cmpxchg() loop
24585f16f322STheodore Ts'o * --- which wouldn't be necessary if all code paths which modify
24595f16f322STheodore Ts'o * i_flags actually followed this rule, is that there is at least one
24605fa8e0a1SJan Kara * code path which doesn't today so we use cmpxchg() out of an abundance
24615fa8e0a1SJan Kara * of caution.
24625f16f322STheodore Ts'o *
24635f16f322STheodore Ts'o * In the long run, i_mutex is overkill, and we should probably look
24645f16f322STheodore Ts'o * at using the i_lock spinlock to protect i_flags, and then make sure
24655f16f322STheodore Ts'o * it is so documented in include/linux/fs.h and that all code follows
24665f16f322STheodore Ts'o * the locking convention!!
24675f16f322STheodore Ts'o */
inode_set_flags(struct inode * inode,unsigned int flags,unsigned int mask)24685f16f322STheodore Ts'o void inode_set_flags(struct inode *inode, unsigned int flags,
24695f16f322STheodore Ts'o unsigned int mask)
24705f16f322STheodore Ts'o {
24715f16f322STheodore Ts'o WARN_ON_ONCE(flags & ~mask);
2472a905737fSVineet Gupta set_mask_bits(&inode->i_flags, mask, flags);
24735f16f322STheodore Ts'o }
24745f16f322STheodore Ts'o EXPORT_SYMBOL(inode_set_flags);
247521fc61c7SAl Viro
inode_nohighmem(struct inode * inode)247621fc61c7SAl Viro void inode_nohighmem(struct inode *inode)
247721fc61c7SAl Viro {
247821fc61c7SAl Viro mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
247921fc61c7SAl Viro }
248021fc61c7SAl Viro EXPORT_SYMBOL(inode_nohighmem);
24813cd88666SDeepa Dinamani
24823cd88666SDeepa Dinamani /**
248350e17c00SDeepa Dinamani * timestamp_truncate - Truncate timespec to a granularity
248450e17c00SDeepa Dinamani * @t: Timespec
248550e17c00SDeepa Dinamani * @inode: inode being updated
248650e17c00SDeepa Dinamani *
248750e17c00SDeepa Dinamani * Truncate a timespec to the granularity supported by the fs
248850e17c00SDeepa Dinamani * containing the inode. Always rounds down. gran must
248950e17c00SDeepa Dinamani * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
249050e17c00SDeepa Dinamani */
timestamp_truncate(struct timespec64 t,struct inode * inode)249150e17c00SDeepa Dinamani struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
249250e17c00SDeepa Dinamani {
249350e17c00SDeepa Dinamani struct super_block *sb = inode->i_sb;
249450e17c00SDeepa Dinamani unsigned int gran = sb->s_time_gran;
249550e17c00SDeepa Dinamani
249650e17c00SDeepa Dinamani t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
249750e17c00SDeepa Dinamani if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
249850e17c00SDeepa Dinamani t.tv_nsec = 0;
249950e17c00SDeepa Dinamani
250050e17c00SDeepa Dinamani /* Avoid division in the common cases 1 ns and 1 s. */
250150e17c00SDeepa Dinamani if (gran == 1)
250250e17c00SDeepa Dinamani ; /* nothing */
250350e17c00SDeepa Dinamani else if (gran == NSEC_PER_SEC)
250450e17c00SDeepa Dinamani t.tv_nsec = 0;
250550e17c00SDeepa Dinamani else if (gran > 1 && gran < NSEC_PER_SEC)
250650e17c00SDeepa Dinamani t.tv_nsec -= t.tv_nsec % gran;
250750e17c00SDeepa Dinamani else
250850e17c00SDeepa Dinamani WARN(1, "invalid file time granularity: %u", gran);
250950e17c00SDeepa Dinamani return t;
251050e17c00SDeepa Dinamani }
251150e17c00SDeepa Dinamani EXPORT_SYMBOL(timestamp_truncate);
251250e17c00SDeepa Dinamani
251350e17c00SDeepa Dinamani /**
25143cd88666SDeepa Dinamani * current_time - Return FS time
25153cd88666SDeepa Dinamani * @inode: inode.
25163cd88666SDeepa Dinamani *
25173cd88666SDeepa Dinamani * Return the current time truncated to the time granularity supported by
25183cd88666SDeepa Dinamani * the fs.
25193cd88666SDeepa Dinamani *
25203cd88666SDeepa Dinamani * Note that inode and inode->sb cannot be NULL.
25213cd88666SDeepa Dinamani * Otherwise, the function warns and returns time without truncation.
25223cd88666SDeepa Dinamani */
current_time(struct inode * inode)252395582b00SDeepa Dinamani struct timespec64 current_time(struct inode *inode)
25243cd88666SDeepa Dinamani {
2525d651d160SArnd Bergmann struct timespec64 now;
2526d651d160SArnd Bergmann
2527d651d160SArnd Bergmann ktime_get_coarse_real_ts64(&now);
252850e17c00SDeepa Dinamani return timestamp_truncate(now, inode);
25293cd88666SDeepa Dinamani }
25303cd88666SDeepa Dinamani EXPORT_SYMBOL(current_time);
25312b3416ceSYang Xu
25322b3416ceSYang Xu /**
25339b6304c1SJeff Layton * inode_set_ctime_current - set the ctime to current_time
25349b6304c1SJeff Layton * @inode: inode
25359b6304c1SJeff Layton *
25369b6304c1SJeff Layton * Set the inode->i_ctime to the current value for the inode. Returns
25379b6304c1SJeff Layton * the current value that was assigned to i_ctime.
25389b6304c1SJeff Layton */
inode_set_ctime_current(struct inode * inode)25399b6304c1SJeff Layton struct timespec64 inode_set_ctime_current(struct inode *inode)
25409b6304c1SJeff Layton {
2541647aa768SChristian Brauner struct timespec64 now = current_time(inode);
25429b6304c1SJeff Layton
2543647aa768SChristian Brauner inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
25449b6304c1SJeff Layton return now;
25459b6304c1SJeff Layton }
25469b6304c1SJeff Layton EXPORT_SYMBOL(inode_set_ctime_current);
25479b6304c1SJeff Layton
25489b6304c1SJeff Layton /**
254911c2a870SChristian Brauner * in_group_or_capable - check whether caller is CAP_FSETID privileged
25509452e93eSChristian Brauner * @idmap: idmap of the mount @inode was found from
255111c2a870SChristian Brauner * @inode: inode to check
255211c2a870SChristian Brauner * @vfsgid: the new/current vfsgid of @inode
255311c2a870SChristian Brauner *
255411c2a870SChristian Brauner * Check wether @vfsgid is in the caller's group list or if the caller is
255511c2a870SChristian Brauner * privileged with CAP_FSETID over @inode. This can be used to determine
255611c2a870SChristian Brauner * whether the setgid bit can be kept or must be dropped.
255711c2a870SChristian Brauner *
255811c2a870SChristian Brauner * Return: true if the caller is sufficiently privileged, false if not.
255911c2a870SChristian Brauner */
in_group_or_capable(struct mnt_idmap * idmap,const struct inode * inode,vfsgid_t vfsgid)25609452e93eSChristian Brauner bool in_group_or_capable(struct mnt_idmap *idmap,
256111c2a870SChristian Brauner const struct inode *inode, vfsgid_t vfsgid)
256211c2a870SChristian Brauner {
256311c2a870SChristian Brauner if (vfsgid_in_group_p(vfsgid))
256411c2a870SChristian Brauner return true;
25659452e93eSChristian Brauner if (capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
256611c2a870SChristian Brauner return true;
256711c2a870SChristian Brauner return false;
256811c2a870SChristian Brauner }
256911c2a870SChristian Brauner
257011c2a870SChristian Brauner /**
25712b3416ceSYang Xu * mode_strip_sgid - handle the sgid bit for non-directories
25729452e93eSChristian Brauner * @idmap: idmap of the mount the inode was created from
25732b3416ceSYang Xu * @dir: parent directory inode
25742b3416ceSYang Xu * @mode: mode of the file to be created in @dir
25752b3416ceSYang Xu *
25762b3416ceSYang Xu * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
25772b3416ceSYang Xu * raised and @dir has the S_ISGID bit raised ensure that the caller is
25782b3416ceSYang Xu * either in the group of the parent directory or they have CAP_FSETID
25792b3416ceSYang Xu * in their user namespace and are privileged over the parent directory.
25802b3416ceSYang Xu * In all other cases, strip the S_ISGID bit from @mode.
25812b3416ceSYang Xu *
25822b3416ceSYang Xu * Return: the new mode to use for the file
25832b3416ceSYang Xu */
mode_strip_sgid(struct mnt_idmap * idmap,const struct inode * dir,umode_t mode)25849452e93eSChristian Brauner umode_t mode_strip_sgid(struct mnt_idmap *idmap,
25852b3416ceSYang Xu const struct inode *dir, umode_t mode)
25862b3416ceSYang Xu {
25872b3416ceSYang Xu if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
25882b3416ceSYang Xu return mode;
25892b3416ceSYang Xu if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
25902b3416ceSYang Xu return mode;
2591e67fe633SChristian Brauner if (in_group_or_capable(idmap, dir, i_gid_into_vfsgid(idmap, dir)))
25922b3416ceSYang Xu return mode;
25932b3416ceSYang Xu return mode & ~S_ISGID;
25942b3416ceSYang Xu }
25952b3416ceSYang Xu EXPORT_SYMBOL(mode_strip_sgid);
2596