16d8b79cfSDave Chinner /* 26d8b79cfSDave Chinner * Copyright (c) 2000-2005 Silicon Graphics, Inc. 36d8b79cfSDave Chinner * All Rights Reserved. 46d8b79cfSDave Chinner * 56d8b79cfSDave Chinner * This program is free software; you can redistribute it and/or 66d8b79cfSDave Chinner * modify it under the terms of the GNU General Public License as 76d8b79cfSDave Chinner * published by the Free Software Foundation. 86d8b79cfSDave Chinner * 96d8b79cfSDave Chinner * This program is distributed in the hope that it would be useful, 106d8b79cfSDave Chinner * but WITHOUT ANY WARRANTY; without even the implied warranty of 116d8b79cfSDave Chinner * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 126d8b79cfSDave Chinner * GNU General Public License for more details. 136d8b79cfSDave Chinner * 146d8b79cfSDave Chinner * You should have received a copy of the GNU General Public License 156d8b79cfSDave Chinner * along with this program; if not, write the Free Software Foundation, 166d8b79cfSDave Chinner * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 176d8b79cfSDave Chinner */ 186d8b79cfSDave Chinner #include "xfs.h" 196d8b79cfSDave Chinner #include "xfs_fs.h" 206d8b79cfSDave Chinner #include "xfs_types.h" 216d8b79cfSDave Chinner #include "xfs_log.h" 226d8b79cfSDave Chinner #include "xfs_log_priv.h" 236d8b79cfSDave Chinner #include "xfs_inum.h" 246d8b79cfSDave Chinner #include "xfs_trans.h" 256d8b79cfSDave Chinner #include "xfs_trans_priv.h" 266d8b79cfSDave Chinner #include "xfs_sb.h" 276d8b79cfSDave Chinner #include "xfs_ag.h" 286d8b79cfSDave Chinner #include "xfs_mount.h" 296d8b79cfSDave Chinner #include "xfs_bmap_btree.h" 306d8b79cfSDave Chinner #include "xfs_inode.h" 316d8b79cfSDave Chinner #include "xfs_dinode.h" 326d8b79cfSDave Chinner #include "xfs_error.h" 336d8b79cfSDave Chinner #include "xfs_filestream.h" 346d8b79cfSDave Chinner #include "xfs_vnodeops.h" 356d8b79cfSDave Chinner #include "xfs_inode_item.h" 366d8b79cfSDave Chinner #include "xfs_quota.h" 376d8b79cfSDave Chinner #include "xfs_trace.h" 386d8b79cfSDave Chinner #include "xfs_fsops.h" 396d8b79cfSDave Chinner #include "xfs_icache.h" 406d8b79cfSDave Chinner 416d8b79cfSDave Chinner #include <linux/kthread.h> 426d8b79cfSDave Chinner #include <linux/freezer.h> 436d8b79cfSDave Chinner 4433479e05SDave Chinner STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, 4533479e05SDave Chinner struct xfs_perag *pag, struct xfs_inode *ip); 4633479e05SDave Chinner 4733479e05SDave Chinner /* 4833479e05SDave Chinner * Allocate and initialise an xfs_inode. 4933479e05SDave Chinner */ 5033479e05SDave Chinner STATIC struct xfs_inode * 5133479e05SDave Chinner xfs_inode_alloc( 5233479e05SDave Chinner struct xfs_mount *mp, 5333479e05SDave Chinner xfs_ino_t ino) 5433479e05SDave Chinner { 5533479e05SDave Chinner struct xfs_inode *ip; 5633479e05SDave Chinner 5733479e05SDave Chinner /* 5833479e05SDave Chinner * if this didn't occur in transactions, we could use 5933479e05SDave Chinner * KM_MAYFAIL and return NULL here on ENOMEM. Set the 6033479e05SDave Chinner * code up to do this anyway. 6133479e05SDave Chinner */ 6233479e05SDave Chinner ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); 6333479e05SDave Chinner if (!ip) 6433479e05SDave Chinner return NULL; 6533479e05SDave Chinner if (inode_init_always(mp->m_super, VFS_I(ip))) { 6633479e05SDave Chinner kmem_zone_free(xfs_inode_zone, ip); 6733479e05SDave Chinner return NULL; 6833479e05SDave Chinner } 6933479e05SDave Chinner 7033479e05SDave Chinner ASSERT(atomic_read(&ip->i_pincount) == 0); 7133479e05SDave Chinner ASSERT(!spin_is_locked(&ip->i_flags_lock)); 7233479e05SDave Chinner ASSERT(!xfs_isiflocked(ip)); 7333479e05SDave Chinner ASSERT(ip->i_ino == 0); 7433479e05SDave Chinner 7533479e05SDave Chinner mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 7633479e05SDave Chinner 7733479e05SDave Chinner /* initialise the xfs inode */ 7833479e05SDave Chinner ip->i_ino = ino; 7933479e05SDave Chinner ip->i_mount = mp; 8033479e05SDave Chinner memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); 8133479e05SDave Chinner ip->i_afp = NULL; 8233479e05SDave Chinner memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 8333479e05SDave Chinner ip->i_flags = 0; 8433479e05SDave Chinner ip->i_delayed_blks = 0; 8533479e05SDave Chinner memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 8633479e05SDave Chinner 8733479e05SDave Chinner return ip; 8833479e05SDave Chinner } 8933479e05SDave Chinner 9033479e05SDave Chinner STATIC void 9133479e05SDave Chinner xfs_inode_free_callback( 9233479e05SDave Chinner struct rcu_head *head) 9333479e05SDave Chinner { 9433479e05SDave Chinner struct inode *inode = container_of(head, struct inode, i_rcu); 9533479e05SDave Chinner struct xfs_inode *ip = XFS_I(inode); 9633479e05SDave Chinner 9733479e05SDave Chinner kmem_zone_free(xfs_inode_zone, ip); 9833479e05SDave Chinner } 9933479e05SDave Chinner 10033479e05SDave Chinner STATIC void 10133479e05SDave Chinner xfs_inode_free( 10233479e05SDave Chinner struct xfs_inode *ip) 10333479e05SDave Chinner { 10433479e05SDave Chinner switch (ip->i_d.di_mode & S_IFMT) { 10533479e05SDave Chinner case S_IFREG: 10633479e05SDave Chinner case S_IFDIR: 10733479e05SDave Chinner case S_IFLNK: 10833479e05SDave Chinner xfs_idestroy_fork(ip, XFS_DATA_FORK); 10933479e05SDave Chinner break; 11033479e05SDave Chinner } 11133479e05SDave Chinner 11233479e05SDave Chinner if (ip->i_afp) 11333479e05SDave Chinner xfs_idestroy_fork(ip, XFS_ATTR_FORK); 11433479e05SDave Chinner 11533479e05SDave Chinner if (ip->i_itemp) { 11633479e05SDave Chinner ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL)); 11733479e05SDave Chinner xfs_inode_item_destroy(ip); 11833479e05SDave Chinner ip->i_itemp = NULL; 11933479e05SDave Chinner } 12033479e05SDave Chinner 12133479e05SDave Chinner /* asserts to verify all state is correct here */ 12233479e05SDave Chinner ASSERT(atomic_read(&ip->i_pincount) == 0); 12333479e05SDave Chinner ASSERT(!spin_is_locked(&ip->i_flags_lock)); 12433479e05SDave Chinner ASSERT(!xfs_isiflocked(ip)); 12533479e05SDave Chinner 12633479e05SDave Chinner /* 12733479e05SDave Chinner * Because we use RCU freeing we need to ensure the inode always 12833479e05SDave Chinner * appears to be reclaimed with an invalid inode number when in the 12933479e05SDave Chinner * free state. The ip->i_flags_lock provides the barrier against lookup 13033479e05SDave Chinner * races. 13133479e05SDave Chinner */ 13233479e05SDave Chinner spin_lock(&ip->i_flags_lock); 13333479e05SDave Chinner ip->i_flags = XFS_IRECLAIM; 13433479e05SDave Chinner ip->i_ino = 0; 13533479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 13633479e05SDave Chinner 13733479e05SDave Chinner call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 13833479e05SDave Chinner } 13933479e05SDave Chinner 14033479e05SDave Chinner /* 14133479e05SDave Chinner * Check the validity of the inode we just found it the cache 14233479e05SDave Chinner */ 14333479e05SDave Chinner static int 14433479e05SDave Chinner xfs_iget_cache_hit( 14533479e05SDave Chinner struct xfs_perag *pag, 14633479e05SDave Chinner struct xfs_inode *ip, 14733479e05SDave Chinner xfs_ino_t ino, 14833479e05SDave Chinner int flags, 14933479e05SDave Chinner int lock_flags) __releases(RCU) 15033479e05SDave Chinner { 15133479e05SDave Chinner struct inode *inode = VFS_I(ip); 15233479e05SDave Chinner struct xfs_mount *mp = ip->i_mount; 15333479e05SDave Chinner int error; 15433479e05SDave Chinner 15533479e05SDave Chinner /* 15633479e05SDave Chinner * check for re-use of an inode within an RCU grace period due to the 15733479e05SDave Chinner * radix tree nodes not being updated yet. We monitor for this by 15833479e05SDave Chinner * setting the inode number to zero before freeing the inode structure. 15933479e05SDave Chinner * If the inode has been reallocated and set up, then the inode number 16033479e05SDave Chinner * will not match, so check for that, too. 16133479e05SDave Chinner */ 16233479e05SDave Chinner spin_lock(&ip->i_flags_lock); 16333479e05SDave Chinner if (ip->i_ino != ino) { 16433479e05SDave Chinner trace_xfs_iget_skip(ip); 16533479e05SDave Chinner XFS_STATS_INC(xs_ig_frecycle); 16633479e05SDave Chinner error = EAGAIN; 16733479e05SDave Chinner goto out_error; 16833479e05SDave Chinner } 16933479e05SDave Chinner 17033479e05SDave Chinner 17133479e05SDave Chinner /* 17233479e05SDave Chinner * If we are racing with another cache hit that is currently 17333479e05SDave Chinner * instantiating this inode or currently recycling it out of 17433479e05SDave Chinner * reclaimabe state, wait for the initialisation to complete 17533479e05SDave Chinner * before continuing. 17633479e05SDave Chinner * 17733479e05SDave Chinner * XXX(hch): eventually we should do something equivalent to 17833479e05SDave Chinner * wait_on_inode to wait for these flags to be cleared 17933479e05SDave Chinner * instead of polling for it. 18033479e05SDave Chinner */ 18133479e05SDave Chinner if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 18233479e05SDave Chinner trace_xfs_iget_skip(ip); 18333479e05SDave Chinner XFS_STATS_INC(xs_ig_frecycle); 18433479e05SDave Chinner error = EAGAIN; 18533479e05SDave Chinner goto out_error; 18633479e05SDave Chinner } 18733479e05SDave Chinner 18833479e05SDave Chinner /* 18933479e05SDave Chinner * If lookup is racing with unlink return an error immediately. 19033479e05SDave Chinner */ 19133479e05SDave Chinner if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 19233479e05SDave Chinner error = ENOENT; 19333479e05SDave Chinner goto out_error; 19433479e05SDave Chinner } 19533479e05SDave Chinner 19633479e05SDave Chinner /* 19733479e05SDave Chinner * If IRECLAIMABLE is set, we've torn down the VFS inode already. 19833479e05SDave Chinner * Need to carefully get it back into useable state. 19933479e05SDave Chinner */ 20033479e05SDave Chinner if (ip->i_flags & XFS_IRECLAIMABLE) { 20133479e05SDave Chinner trace_xfs_iget_reclaim(ip); 20233479e05SDave Chinner 20333479e05SDave Chinner /* 20433479e05SDave Chinner * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode 20533479e05SDave Chinner * from stomping over us while we recycle the inode. We can't 20633479e05SDave Chinner * clear the radix tree reclaimable tag yet as it requires 20733479e05SDave Chinner * pag_ici_lock to be held exclusive. 20833479e05SDave Chinner */ 20933479e05SDave Chinner ip->i_flags |= XFS_IRECLAIM; 21033479e05SDave Chinner 21133479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 21233479e05SDave Chinner rcu_read_unlock(); 21333479e05SDave Chinner 21433479e05SDave Chinner error = -inode_init_always(mp->m_super, inode); 21533479e05SDave Chinner if (error) { 21633479e05SDave Chinner /* 21733479e05SDave Chinner * Re-initializing the inode failed, and we are in deep 21833479e05SDave Chinner * trouble. Try to re-add it to the reclaim list. 21933479e05SDave Chinner */ 22033479e05SDave Chinner rcu_read_lock(); 22133479e05SDave Chinner spin_lock(&ip->i_flags_lock); 22233479e05SDave Chinner 22333479e05SDave Chinner ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); 22433479e05SDave Chinner ASSERT(ip->i_flags & XFS_IRECLAIMABLE); 22533479e05SDave Chinner trace_xfs_iget_reclaim_fail(ip); 22633479e05SDave Chinner goto out_error; 22733479e05SDave Chinner } 22833479e05SDave Chinner 22933479e05SDave Chinner spin_lock(&pag->pag_ici_lock); 23033479e05SDave Chinner spin_lock(&ip->i_flags_lock); 23133479e05SDave Chinner 23233479e05SDave Chinner /* 23333479e05SDave Chinner * Clear the per-lifetime state in the inode as we are now 23433479e05SDave Chinner * effectively a new inode and need to return to the initial 23533479e05SDave Chinner * state before reuse occurs. 23633479e05SDave Chinner */ 23733479e05SDave Chinner ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; 23833479e05SDave Chinner ip->i_flags |= XFS_INEW; 23933479e05SDave Chinner __xfs_inode_clear_reclaim_tag(mp, pag, ip); 24033479e05SDave Chinner inode->i_state = I_NEW; 24133479e05SDave Chinner 24233479e05SDave Chinner ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); 24333479e05SDave Chinner mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 24433479e05SDave Chinner 24533479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 24633479e05SDave Chinner spin_unlock(&pag->pag_ici_lock); 24733479e05SDave Chinner } else { 24833479e05SDave Chinner /* If the VFS inode is being torn down, pause and try again. */ 24933479e05SDave Chinner if (!igrab(inode)) { 25033479e05SDave Chinner trace_xfs_iget_skip(ip); 25133479e05SDave Chinner error = EAGAIN; 25233479e05SDave Chinner goto out_error; 25333479e05SDave Chinner } 25433479e05SDave Chinner 25533479e05SDave Chinner /* We've got a live one. */ 25633479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 25733479e05SDave Chinner rcu_read_unlock(); 25833479e05SDave Chinner trace_xfs_iget_hit(ip); 25933479e05SDave Chinner } 26033479e05SDave Chinner 26133479e05SDave Chinner if (lock_flags != 0) 26233479e05SDave Chinner xfs_ilock(ip, lock_flags); 26333479e05SDave Chinner 26433479e05SDave Chinner xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); 26533479e05SDave Chinner XFS_STATS_INC(xs_ig_found); 26633479e05SDave Chinner 26733479e05SDave Chinner return 0; 26833479e05SDave Chinner 26933479e05SDave Chinner out_error: 27033479e05SDave Chinner spin_unlock(&ip->i_flags_lock); 27133479e05SDave Chinner rcu_read_unlock(); 27233479e05SDave Chinner return error; 27333479e05SDave Chinner } 27433479e05SDave Chinner 27533479e05SDave Chinner 27633479e05SDave Chinner static int 27733479e05SDave Chinner xfs_iget_cache_miss( 27833479e05SDave Chinner struct xfs_mount *mp, 27933479e05SDave Chinner struct xfs_perag *pag, 28033479e05SDave Chinner xfs_trans_t *tp, 28133479e05SDave Chinner xfs_ino_t ino, 28233479e05SDave Chinner struct xfs_inode **ipp, 28333479e05SDave Chinner int flags, 28433479e05SDave Chinner int lock_flags) 28533479e05SDave Chinner { 28633479e05SDave Chinner struct xfs_inode *ip; 28733479e05SDave Chinner int error; 28833479e05SDave Chinner xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); 28933479e05SDave Chinner int iflags; 29033479e05SDave Chinner 29133479e05SDave Chinner ip = xfs_inode_alloc(mp, ino); 29233479e05SDave Chinner if (!ip) 29333479e05SDave Chinner return ENOMEM; 29433479e05SDave Chinner 29533479e05SDave Chinner error = xfs_iread(mp, tp, ip, flags); 29633479e05SDave Chinner if (error) 29733479e05SDave Chinner goto out_destroy; 29833479e05SDave Chinner 29933479e05SDave Chinner trace_xfs_iget_miss(ip); 30033479e05SDave Chinner 30133479e05SDave Chinner if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 30233479e05SDave Chinner error = ENOENT; 30333479e05SDave Chinner goto out_destroy; 30433479e05SDave Chinner } 30533479e05SDave Chinner 30633479e05SDave Chinner /* 30733479e05SDave Chinner * Preload the radix tree so we can insert safely under the 30833479e05SDave Chinner * write spinlock. Note that we cannot sleep inside the preload 30933479e05SDave Chinner * region. Since we can be called from transaction context, don't 31033479e05SDave Chinner * recurse into the file system. 31133479e05SDave Chinner */ 31233479e05SDave Chinner if (radix_tree_preload(GFP_NOFS)) { 31333479e05SDave Chinner error = EAGAIN; 31433479e05SDave Chinner goto out_destroy; 31533479e05SDave Chinner } 31633479e05SDave Chinner 31733479e05SDave Chinner /* 31833479e05SDave Chinner * Because the inode hasn't been added to the radix-tree yet it can't 31933479e05SDave Chinner * be found by another thread, so we can do the non-sleeping lock here. 32033479e05SDave Chinner */ 32133479e05SDave Chinner if (lock_flags) { 32233479e05SDave Chinner if (!xfs_ilock_nowait(ip, lock_flags)) 32333479e05SDave Chinner BUG(); 32433479e05SDave Chinner } 32533479e05SDave Chinner 32633479e05SDave Chinner /* 32733479e05SDave Chinner * These values must be set before inserting the inode into the radix 32833479e05SDave Chinner * tree as the moment it is inserted a concurrent lookup (allowed by the 32933479e05SDave Chinner * RCU locking mechanism) can find it and that lookup must see that this 33033479e05SDave Chinner * is an inode currently under construction (i.e. that XFS_INEW is set). 33133479e05SDave Chinner * The ip->i_flags_lock that protects the XFS_INEW flag forms the 33233479e05SDave Chinner * memory barrier that ensures this detection works correctly at lookup 33333479e05SDave Chinner * time. 33433479e05SDave Chinner */ 33533479e05SDave Chinner iflags = XFS_INEW; 33633479e05SDave Chinner if (flags & XFS_IGET_DONTCACHE) 33733479e05SDave Chinner iflags |= XFS_IDONTCACHE; 33833479e05SDave Chinner ip->i_udquot = ip->i_gdquot = NULL; 33933479e05SDave Chinner xfs_iflags_set(ip, iflags); 34033479e05SDave Chinner 34133479e05SDave Chinner /* insert the new inode */ 34233479e05SDave Chinner spin_lock(&pag->pag_ici_lock); 34333479e05SDave Chinner error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 34433479e05SDave Chinner if (unlikely(error)) { 34533479e05SDave Chinner WARN_ON(error != -EEXIST); 34633479e05SDave Chinner XFS_STATS_INC(xs_ig_dup); 34733479e05SDave Chinner error = EAGAIN; 34833479e05SDave Chinner goto out_preload_end; 34933479e05SDave Chinner } 35033479e05SDave Chinner spin_unlock(&pag->pag_ici_lock); 35133479e05SDave Chinner radix_tree_preload_end(); 35233479e05SDave Chinner 35333479e05SDave Chinner *ipp = ip; 35433479e05SDave Chinner return 0; 35533479e05SDave Chinner 35633479e05SDave Chinner out_preload_end: 35733479e05SDave Chinner spin_unlock(&pag->pag_ici_lock); 35833479e05SDave Chinner radix_tree_preload_end(); 35933479e05SDave Chinner if (lock_flags) 36033479e05SDave Chinner xfs_iunlock(ip, lock_flags); 36133479e05SDave Chinner out_destroy: 36233479e05SDave Chinner __destroy_inode(VFS_I(ip)); 36333479e05SDave Chinner xfs_inode_free(ip); 36433479e05SDave Chinner return error; 36533479e05SDave Chinner } 36633479e05SDave Chinner 36733479e05SDave Chinner /* 36833479e05SDave Chinner * Look up an inode by number in the given file system. 36933479e05SDave Chinner * The inode is looked up in the cache held in each AG. 37033479e05SDave Chinner * If the inode is found in the cache, initialise the vfs inode 37133479e05SDave Chinner * if necessary. 37233479e05SDave Chinner * 37333479e05SDave Chinner * If it is not in core, read it in from the file system's device, 37433479e05SDave Chinner * add it to the cache and initialise the vfs inode. 37533479e05SDave Chinner * 37633479e05SDave Chinner * The inode is locked according to the value of the lock_flags parameter. 37733479e05SDave Chinner * This flag parameter indicates how and if the inode's IO lock and inode lock 37833479e05SDave Chinner * should be taken. 37933479e05SDave Chinner * 38033479e05SDave Chinner * mp -- the mount point structure for the current file system. It points 38133479e05SDave Chinner * to the inode hash table. 38233479e05SDave Chinner * tp -- a pointer to the current transaction if there is one. This is 38333479e05SDave Chinner * simply passed through to the xfs_iread() call. 38433479e05SDave Chinner * ino -- the number of the inode desired. This is the unique identifier 38533479e05SDave Chinner * within the file system for the inode being requested. 38633479e05SDave Chinner * lock_flags -- flags indicating how to lock the inode. See the comment 38733479e05SDave Chinner * for xfs_ilock() for a list of valid values. 38833479e05SDave Chinner */ 38933479e05SDave Chinner int 39033479e05SDave Chinner xfs_iget( 39133479e05SDave Chinner xfs_mount_t *mp, 39233479e05SDave Chinner xfs_trans_t *tp, 39333479e05SDave Chinner xfs_ino_t ino, 39433479e05SDave Chinner uint flags, 39533479e05SDave Chinner uint lock_flags, 39633479e05SDave Chinner xfs_inode_t **ipp) 39733479e05SDave Chinner { 39833479e05SDave Chinner xfs_inode_t *ip; 39933479e05SDave Chinner int error; 40033479e05SDave Chinner xfs_perag_t *pag; 40133479e05SDave Chinner xfs_agino_t agino; 40233479e05SDave Chinner 40333479e05SDave Chinner /* 40433479e05SDave Chinner * xfs_reclaim_inode() uses the ILOCK to ensure an inode 40533479e05SDave Chinner * doesn't get freed while it's being referenced during a 40633479e05SDave Chinner * radix tree traversal here. It assumes this function 40733479e05SDave Chinner * aqcuires only the ILOCK (and therefore it has no need to 40833479e05SDave Chinner * involve the IOLOCK in this synchronization). 40933479e05SDave Chinner */ 41033479e05SDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); 41133479e05SDave Chinner 41233479e05SDave Chinner /* reject inode numbers outside existing AGs */ 41333479e05SDave Chinner if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 41433479e05SDave Chinner return EINVAL; 41533479e05SDave Chinner 41633479e05SDave Chinner /* get the perag structure and ensure that it's inode capable */ 41733479e05SDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 41833479e05SDave Chinner agino = XFS_INO_TO_AGINO(mp, ino); 41933479e05SDave Chinner 42033479e05SDave Chinner again: 42133479e05SDave Chinner error = 0; 42233479e05SDave Chinner rcu_read_lock(); 42333479e05SDave Chinner ip = radix_tree_lookup(&pag->pag_ici_root, agino); 42433479e05SDave Chinner 42533479e05SDave Chinner if (ip) { 42633479e05SDave Chinner error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); 42733479e05SDave Chinner if (error) 42833479e05SDave Chinner goto out_error_or_again; 42933479e05SDave Chinner } else { 43033479e05SDave Chinner rcu_read_unlock(); 43133479e05SDave Chinner XFS_STATS_INC(xs_ig_missed); 43233479e05SDave Chinner 43333479e05SDave Chinner error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 43433479e05SDave Chinner flags, lock_flags); 43533479e05SDave Chinner if (error) 43633479e05SDave Chinner goto out_error_or_again; 43733479e05SDave Chinner } 43833479e05SDave Chinner xfs_perag_put(pag); 43933479e05SDave Chinner 44033479e05SDave Chinner *ipp = ip; 44133479e05SDave Chinner 44233479e05SDave Chinner /* 44333479e05SDave Chinner * If we have a real type for an on-disk inode, we can set ops(&unlock) 44433479e05SDave Chinner * now. If it's a new inode being created, xfs_ialloc will handle it. 44533479e05SDave Chinner */ 44633479e05SDave Chinner if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) 44733479e05SDave Chinner xfs_setup_inode(ip); 44833479e05SDave Chinner return 0; 44933479e05SDave Chinner 45033479e05SDave Chinner out_error_or_again: 45133479e05SDave Chinner if (error == EAGAIN) { 45233479e05SDave Chinner delay(1); 45333479e05SDave Chinner goto again; 45433479e05SDave Chinner } 45533479e05SDave Chinner xfs_perag_put(pag); 45633479e05SDave Chinner return error; 45733479e05SDave Chinner } 45833479e05SDave Chinner 4596d8b79cfSDave Chinner /* 4606d8b79cfSDave Chinner * The inode lookup is done in batches to keep the amount of lock traffic and 4616d8b79cfSDave Chinner * radix tree lookups to a minimum. The batch size is a trade off between 4626d8b79cfSDave Chinner * lookup reduction and stack usage. This is in the reclaim path, so we can't 4636d8b79cfSDave Chinner * be too greedy. 4646d8b79cfSDave Chinner */ 4656d8b79cfSDave Chinner #define XFS_LOOKUP_BATCH 32 4666d8b79cfSDave Chinner 4676d8b79cfSDave Chinner STATIC int 4686d8b79cfSDave Chinner xfs_inode_ag_walk_grab( 4696d8b79cfSDave Chinner struct xfs_inode *ip) 4706d8b79cfSDave Chinner { 4716d8b79cfSDave Chinner struct inode *inode = VFS_I(ip); 4726d8b79cfSDave Chinner 4736d8b79cfSDave Chinner ASSERT(rcu_read_lock_held()); 4746d8b79cfSDave Chinner 4756d8b79cfSDave Chinner /* 4766d8b79cfSDave Chinner * check for stale RCU freed inode 4776d8b79cfSDave Chinner * 4786d8b79cfSDave Chinner * If the inode has been reallocated, it doesn't matter if it's not in 4796d8b79cfSDave Chinner * the AG we are walking - we are walking for writeback, so if it 4806d8b79cfSDave Chinner * passes all the "valid inode" checks and is dirty, then we'll write 4816d8b79cfSDave Chinner * it back anyway. If it has been reallocated and still being 4826d8b79cfSDave Chinner * initialised, the XFS_INEW check below will catch it. 4836d8b79cfSDave Chinner */ 4846d8b79cfSDave Chinner spin_lock(&ip->i_flags_lock); 4856d8b79cfSDave Chinner if (!ip->i_ino) 4866d8b79cfSDave Chinner goto out_unlock_noent; 4876d8b79cfSDave Chinner 4886d8b79cfSDave Chinner /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ 4896d8b79cfSDave Chinner if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) 4906d8b79cfSDave Chinner goto out_unlock_noent; 4916d8b79cfSDave Chinner spin_unlock(&ip->i_flags_lock); 4926d8b79cfSDave Chinner 4936d8b79cfSDave Chinner /* nothing to sync during shutdown */ 4946d8b79cfSDave Chinner if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 4956d8b79cfSDave Chinner return EFSCORRUPTED; 4966d8b79cfSDave Chinner 4976d8b79cfSDave Chinner /* If we can't grab the inode, it must on it's way to reclaim. */ 4986d8b79cfSDave Chinner if (!igrab(inode)) 4996d8b79cfSDave Chinner return ENOENT; 5006d8b79cfSDave Chinner 5016d8b79cfSDave Chinner if (is_bad_inode(inode)) { 5026d8b79cfSDave Chinner IRELE(ip); 5036d8b79cfSDave Chinner return ENOENT; 5046d8b79cfSDave Chinner } 5056d8b79cfSDave Chinner 5066d8b79cfSDave Chinner /* inode is valid */ 5076d8b79cfSDave Chinner return 0; 5086d8b79cfSDave Chinner 5096d8b79cfSDave Chinner out_unlock_noent: 5106d8b79cfSDave Chinner spin_unlock(&ip->i_flags_lock); 5116d8b79cfSDave Chinner return ENOENT; 5126d8b79cfSDave Chinner } 5136d8b79cfSDave Chinner 5146d8b79cfSDave Chinner STATIC int 5156d8b79cfSDave Chinner xfs_inode_ag_walk( 5166d8b79cfSDave Chinner struct xfs_mount *mp, 5176d8b79cfSDave Chinner struct xfs_perag *pag, 5186d8b79cfSDave Chinner int (*execute)(struct xfs_inode *ip, 5196d8b79cfSDave Chinner struct xfs_perag *pag, int flags), 5206d8b79cfSDave Chinner int flags) 5216d8b79cfSDave Chinner { 5226d8b79cfSDave Chinner uint32_t first_index; 5236d8b79cfSDave Chinner int last_error = 0; 5246d8b79cfSDave Chinner int skipped; 5256d8b79cfSDave Chinner int done; 5266d8b79cfSDave Chinner int nr_found; 5276d8b79cfSDave Chinner 5286d8b79cfSDave Chinner restart: 5296d8b79cfSDave Chinner done = 0; 5306d8b79cfSDave Chinner skipped = 0; 5316d8b79cfSDave Chinner first_index = 0; 5326d8b79cfSDave Chinner nr_found = 0; 5336d8b79cfSDave Chinner do { 5346d8b79cfSDave Chinner struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 5356d8b79cfSDave Chinner int error = 0; 5366d8b79cfSDave Chinner int i; 5376d8b79cfSDave Chinner 5386d8b79cfSDave Chinner rcu_read_lock(); 5396d8b79cfSDave Chinner nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 5406d8b79cfSDave Chinner (void **)batch, first_index, 5416d8b79cfSDave Chinner XFS_LOOKUP_BATCH); 5426d8b79cfSDave Chinner if (!nr_found) { 5436d8b79cfSDave Chinner rcu_read_unlock(); 5446d8b79cfSDave Chinner break; 5456d8b79cfSDave Chinner } 5466d8b79cfSDave Chinner 5476d8b79cfSDave Chinner /* 5486d8b79cfSDave Chinner * Grab the inodes before we drop the lock. if we found 5496d8b79cfSDave Chinner * nothing, nr == 0 and the loop will be skipped. 5506d8b79cfSDave Chinner */ 5516d8b79cfSDave Chinner for (i = 0; i < nr_found; i++) { 5526d8b79cfSDave Chinner struct xfs_inode *ip = batch[i]; 5536d8b79cfSDave Chinner 5546d8b79cfSDave Chinner if (done || xfs_inode_ag_walk_grab(ip)) 5556d8b79cfSDave Chinner batch[i] = NULL; 5566d8b79cfSDave Chinner 5576d8b79cfSDave Chinner /* 5586d8b79cfSDave Chinner * Update the index for the next lookup. Catch 5596d8b79cfSDave Chinner * overflows into the next AG range which can occur if 5606d8b79cfSDave Chinner * we have inodes in the last block of the AG and we 5616d8b79cfSDave Chinner * are currently pointing to the last inode. 5626d8b79cfSDave Chinner * 5636d8b79cfSDave Chinner * Because we may see inodes that are from the wrong AG 5646d8b79cfSDave Chinner * due to RCU freeing and reallocation, only update the 5656d8b79cfSDave Chinner * index if it lies in this AG. It was a race that lead 5666d8b79cfSDave Chinner * us to see this inode, so another lookup from the 5676d8b79cfSDave Chinner * same index will not find it again. 5686d8b79cfSDave Chinner */ 5696d8b79cfSDave Chinner if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) 5706d8b79cfSDave Chinner continue; 5716d8b79cfSDave Chinner first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 5726d8b79cfSDave Chinner if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 5736d8b79cfSDave Chinner done = 1; 5746d8b79cfSDave Chinner } 5756d8b79cfSDave Chinner 5766d8b79cfSDave Chinner /* unlock now we've grabbed the inodes. */ 5776d8b79cfSDave Chinner rcu_read_unlock(); 5786d8b79cfSDave Chinner 5796d8b79cfSDave Chinner for (i = 0; i < nr_found; i++) { 5806d8b79cfSDave Chinner if (!batch[i]) 5816d8b79cfSDave Chinner continue; 5826d8b79cfSDave Chinner error = execute(batch[i], pag, flags); 5836d8b79cfSDave Chinner IRELE(batch[i]); 5846d8b79cfSDave Chinner if (error == EAGAIN) { 5856d8b79cfSDave Chinner skipped++; 5866d8b79cfSDave Chinner continue; 5876d8b79cfSDave Chinner } 5886d8b79cfSDave Chinner if (error && last_error != EFSCORRUPTED) 5896d8b79cfSDave Chinner last_error = error; 5906d8b79cfSDave Chinner } 5916d8b79cfSDave Chinner 5926d8b79cfSDave Chinner /* bail out if the filesystem is corrupted. */ 5936d8b79cfSDave Chinner if (error == EFSCORRUPTED) 5946d8b79cfSDave Chinner break; 5956d8b79cfSDave Chinner 5966d8b79cfSDave Chinner cond_resched(); 5976d8b79cfSDave Chinner 5986d8b79cfSDave Chinner } while (nr_found && !done); 5996d8b79cfSDave Chinner 6006d8b79cfSDave Chinner if (skipped) { 6016d8b79cfSDave Chinner delay(1); 6026d8b79cfSDave Chinner goto restart; 6036d8b79cfSDave Chinner } 6046d8b79cfSDave Chinner return last_error; 6056d8b79cfSDave Chinner } 6066d8b79cfSDave Chinner 6076d8b79cfSDave Chinner int 6086d8b79cfSDave Chinner xfs_inode_ag_iterator( 6096d8b79cfSDave Chinner struct xfs_mount *mp, 6106d8b79cfSDave Chinner int (*execute)(struct xfs_inode *ip, 6116d8b79cfSDave Chinner struct xfs_perag *pag, int flags), 6126d8b79cfSDave Chinner int flags) 6136d8b79cfSDave Chinner { 6146d8b79cfSDave Chinner struct xfs_perag *pag; 6156d8b79cfSDave Chinner int error = 0; 6166d8b79cfSDave Chinner int last_error = 0; 6176d8b79cfSDave Chinner xfs_agnumber_t ag; 6186d8b79cfSDave Chinner 6196d8b79cfSDave Chinner ag = 0; 6206d8b79cfSDave Chinner while ((pag = xfs_perag_get(mp, ag))) { 6216d8b79cfSDave Chinner ag = pag->pag_agno + 1; 6226d8b79cfSDave Chinner error = xfs_inode_ag_walk(mp, pag, execute, flags); 6236d8b79cfSDave Chinner xfs_perag_put(pag); 6246d8b79cfSDave Chinner if (error) { 6256d8b79cfSDave Chinner last_error = error; 6266d8b79cfSDave Chinner if (error == EFSCORRUPTED) 6276d8b79cfSDave Chinner break; 6286d8b79cfSDave Chinner } 6296d8b79cfSDave Chinner } 6306d8b79cfSDave Chinner return XFS_ERROR(last_error); 6316d8b79cfSDave Chinner } 6326d8b79cfSDave Chinner 6336d8b79cfSDave Chinner /* 6346d8b79cfSDave Chinner * Queue a new inode reclaim pass if there are reclaimable inodes and there 6356d8b79cfSDave Chinner * isn't a reclaim pass already in progress. By default it runs every 5s based 6366d8b79cfSDave Chinner * on the xfs periodic sync default of 30s. Perhaps this should have it's own 6376d8b79cfSDave Chinner * tunable, but that can be done if this method proves to be ineffective or too 6386d8b79cfSDave Chinner * aggressive. 6396d8b79cfSDave Chinner */ 6406d8b79cfSDave Chinner static void 6416d8b79cfSDave Chinner xfs_reclaim_work_queue( 6426d8b79cfSDave Chinner struct xfs_mount *mp) 6436d8b79cfSDave Chinner { 6446d8b79cfSDave Chinner 6456d8b79cfSDave Chinner rcu_read_lock(); 6466d8b79cfSDave Chinner if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { 6476d8b79cfSDave Chinner queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, 6486d8b79cfSDave Chinner msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); 6496d8b79cfSDave Chinner } 6506d8b79cfSDave Chinner rcu_read_unlock(); 6516d8b79cfSDave Chinner } 6526d8b79cfSDave Chinner 6536d8b79cfSDave Chinner /* 6546d8b79cfSDave Chinner * This is a fast pass over the inode cache to try to get reclaim moving on as 6556d8b79cfSDave Chinner * many inodes as possible in a short period of time. It kicks itself every few 6566d8b79cfSDave Chinner * seconds, as well as being kicked by the inode cache shrinker when memory 6576d8b79cfSDave Chinner * goes low. It scans as quickly as possible avoiding locked inodes or those 6586d8b79cfSDave Chinner * already being flushed, and once done schedules a future pass. 6596d8b79cfSDave Chinner */ 6606d8b79cfSDave Chinner void 6616d8b79cfSDave Chinner xfs_reclaim_worker( 6626d8b79cfSDave Chinner struct work_struct *work) 6636d8b79cfSDave Chinner { 6646d8b79cfSDave Chinner struct xfs_mount *mp = container_of(to_delayed_work(work), 6656d8b79cfSDave Chinner struct xfs_mount, m_reclaim_work); 6666d8b79cfSDave Chinner 6676d8b79cfSDave Chinner xfs_reclaim_inodes(mp, SYNC_TRYLOCK); 6686d8b79cfSDave Chinner xfs_reclaim_work_queue(mp); 6696d8b79cfSDave Chinner } 6706d8b79cfSDave Chinner 67133479e05SDave Chinner static void 6726d8b79cfSDave Chinner __xfs_inode_set_reclaim_tag( 6736d8b79cfSDave Chinner struct xfs_perag *pag, 6746d8b79cfSDave Chinner struct xfs_inode *ip) 6756d8b79cfSDave Chinner { 6766d8b79cfSDave Chinner radix_tree_tag_set(&pag->pag_ici_root, 6776d8b79cfSDave Chinner XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 6786d8b79cfSDave Chinner XFS_ICI_RECLAIM_TAG); 6796d8b79cfSDave Chinner 6806d8b79cfSDave Chinner if (!pag->pag_ici_reclaimable) { 6816d8b79cfSDave Chinner /* propagate the reclaim tag up into the perag radix tree */ 6826d8b79cfSDave Chinner spin_lock(&ip->i_mount->m_perag_lock); 6836d8b79cfSDave Chinner radix_tree_tag_set(&ip->i_mount->m_perag_tree, 6846d8b79cfSDave Chinner XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 6856d8b79cfSDave Chinner XFS_ICI_RECLAIM_TAG); 6866d8b79cfSDave Chinner spin_unlock(&ip->i_mount->m_perag_lock); 6876d8b79cfSDave Chinner 6886d8b79cfSDave Chinner /* schedule periodic background inode reclaim */ 6896d8b79cfSDave Chinner xfs_reclaim_work_queue(ip->i_mount); 6906d8b79cfSDave Chinner 6916d8b79cfSDave Chinner trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, 6926d8b79cfSDave Chinner -1, _RET_IP_); 6936d8b79cfSDave Chinner } 6946d8b79cfSDave Chinner pag->pag_ici_reclaimable++; 6956d8b79cfSDave Chinner } 6966d8b79cfSDave Chinner 6976d8b79cfSDave Chinner /* 6986d8b79cfSDave Chinner * We set the inode flag atomically with the radix tree tag. 6996d8b79cfSDave Chinner * Once we get tag lookups on the radix tree, this inode flag 7006d8b79cfSDave Chinner * can go away. 7016d8b79cfSDave Chinner */ 7026d8b79cfSDave Chinner void 7036d8b79cfSDave Chinner xfs_inode_set_reclaim_tag( 7046d8b79cfSDave Chinner xfs_inode_t *ip) 7056d8b79cfSDave Chinner { 7066d8b79cfSDave Chinner struct xfs_mount *mp = ip->i_mount; 7076d8b79cfSDave Chinner struct xfs_perag *pag; 7086d8b79cfSDave Chinner 7096d8b79cfSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 7106d8b79cfSDave Chinner spin_lock(&pag->pag_ici_lock); 7116d8b79cfSDave Chinner spin_lock(&ip->i_flags_lock); 7126d8b79cfSDave Chinner __xfs_inode_set_reclaim_tag(pag, ip); 7136d8b79cfSDave Chinner __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 7146d8b79cfSDave Chinner spin_unlock(&ip->i_flags_lock); 7156d8b79cfSDave Chinner spin_unlock(&pag->pag_ici_lock); 7166d8b79cfSDave Chinner xfs_perag_put(pag); 7176d8b79cfSDave Chinner } 7186d8b79cfSDave Chinner 7196d8b79cfSDave Chinner STATIC void 7206d8b79cfSDave Chinner __xfs_inode_clear_reclaim( 7216d8b79cfSDave Chinner xfs_perag_t *pag, 7226d8b79cfSDave Chinner xfs_inode_t *ip) 7236d8b79cfSDave Chinner { 7246d8b79cfSDave Chinner pag->pag_ici_reclaimable--; 7256d8b79cfSDave Chinner if (!pag->pag_ici_reclaimable) { 7266d8b79cfSDave Chinner /* clear the reclaim tag from the perag radix tree */ 7276d8b79cfSDave Chinner spin_lock(&ip->i_mount->m_perag_lock); 7286d8b79cfSDave Chinner radix_tree_tag_clear(&ip->i_mount->m_perag_tree, 7296d8b79cfSDave Chinner XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 7306d8b79cfSDave Chinner XFS_ICI_RECLAIM_TAG); 7316d8b79cfSDave Chinner spin_unlock(&ip->i_mount->m_perag_lock); 7326d8b79cfSDave Chinner trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, 7336d8b79cfSDave Chinner -1, _RET_IP_); 7346d8b79cfSDave Chinner } 7356d8b79cfSDave Chinner } 7366d8b79cfSDave Chinner 73733479e05SDave Chinner STATIC void 7386d8b79cfSDave Chinner __xfs_inode_clear_reclaim_tag( 7396d8b79cfSDave Chinner xfs_mount_t *mp, 7406d8b79cfSDave Chinner xfs_perag_t *pag, 7416d8b79cfSDave Chinner xfs_inode_t *ip) 7426d8b79cfSDave Chinner { 7436d8b79cfSDave Chinner radix_tree_tag_clear(&pag->pag_ici_root, 7446d8b79cfSDave Chinner XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 7456d8b79cfSDave Chinner __xfs_inode_clear_reclaim(pag, ip); 7466d8b79cfSDave Chinner } 7476d8b79cfSDave Chinner 7486d8b79cfSDave Chinner /* 7496d8b79cfSDave Chinner * Grab the inode for reclaim exclusively. 7506d8b79cfSDave Chinner * Return 0 if we grabbed it, non-zero otherwise. 7516d8b79cfSDave Chinner */ 7526d8b79cfSDave Chinner STATIC int 7536d8b79cfSDave Chinner xfs_reclaim_inode_grab( 7546d8b79cfSDave Chinner struct xfs_inode *ip, 7556d8b79cfSDave Chinner int flags) 7566d8b79cfSDave Chinner { 7576d8b79cfSDave Chinner ASSERT(rcu_read_lock_held()); 7586d8b79cfSDave Chinner 7596d8b79cfSDave Chinner /* quick check for stale RCU freed inode */ 7606d8b79cfSDave Chinner if (!ip->i_ino) 7616d8b79cfSDave Chinner return 1; 7626d8b79cfSDave Chinner 7636d8b79cfSDave Chinner /* 7646d8b79cfSDave Chinner * If we are asked for non-blocking operation, do unlocked checks to 7656d8b79cfSDave Chinner * see if the inode already is being flushed or in reclaim to avoid 7666d8b79cfSDave Chinner * lock traffic. 7676d8b79cfSDave Chinner */ 7686d8b79cfSDave Chinner if ((flags & SYNC_TRYLOCK) && 7696d8b79cfSDave Chinner __xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM)) 7706d8b79cfSDave Chinner return 1; 7716d8b79cfSDave Chinner 7726d8b79cfSDave Chinner /* 7736d8b79cfSDave Chinner * The radix tree lock here protects a thread in xfs_iget from racing 7746d8b79cfSDave Chinner * with us starting reclaim on the inode. Once we have the 7756d8b79cfSDave Chinner * XFS_IRECLAIM flag set it will not touch us. 7766d8b79cfSDave Chinner * 7776d8b79cfSDave Chinner * Due to RCU lookup, we may find inodes that have been freed and only 7786d8b79cfSDave Chinner * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that 7796d8b79cfSDave Chinner * aren't candidates for reclaim at all, so we must check the 7806d8b79cfSDave Chinner * XFS_IRECLAIMABLE is set first before proceeding to reclaim. 7816d8b79cfSDave Chinner */ 7826d8b79cfSDave Chinner spin_lock(&ip->i_flags_lock); 7836d8b79cfSDave Chinner if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || 7846d8b79cfSDave Chinner __xfs_iflags_test(ip, XFS_IRECLAIM)) { 7856d8b79cfSDave Chinner /* not a reclaim candidate. */ 7866d8b79cfSDave Chinner spin_unlock(&ip->i_flags_lock); 7876d8b79cfSDave Chinner return 1; 7886d8b79cfSDave Chinner } 7896d8b79cfSDave Chinner __xfs_iflags_set(ip, XFS_IRECLAIM); 7906d8b79cfSDave Chinner spin_unlock(&ip->i_flags_lock); 7916d8b79cfSDave Chinner return 0; 7926d8b79cfSDave Chinner } 7936d8b79cfSDave Chinner 7946d8b79cfSDave Chinner /* 7956d8b79cfSDave Chinner * Inodes in different states need to be treated differently. The following 7966d8b79cfSDave Chinner * table lists the inode states and the reclaim actions necessary: 7976d8b79cfSDave Chinner * 7986d8b79cfSDave Chinner * inode state iflush ret required action 7996d8b79cfSDave Chinner * --------------- ---------- --------------- 8006d8b79cfSDave Chinner * bad - reclaim 8016d8b79cfSDave Chinner * shutdown EIO unpin and reclaim 8026d8b79cfSDave Chinner * clean, unpinned 0 reclaim 8036d8b79cfSDave Chinner * stale, unpinned 0 reclaim 8046d8b79cfSDave Chinner * clean, pinned(*) 0 requeue 8056d8b79cfSDave Chinner * stale, pinned EAGAIN requeue 8066d8b79cfSDave Chinner * dirty, async - requeue 8076d8b79cfSDave Chinner * dirty, sync 0 reclaim 8086d8b79cfSDave Chinner * 8096d8b79cfSDave Chinner * (*) dgc: I don't think the clean, pinned state is possible but it gets 8106d8b79cfSDave Chinner * handled anyway given the order of checks implemented. 8116d8b79cfSDave Chinner * 8126d8b79cfSDave Chinner * Also, because we get the flush lock first, we know that any inode that has 8136d8b79cfSDave Chinner * been flushed delwri has had the flush completed by the time we check that 8146d8b79cfSDave Chinner * the inode is clean. 8156d8b79cfSDave Chinner * 8166d8b79cfSDave Chinner * Note that because the inode is flushed delayed write by AIL pushing, the 8176d8b79cfSDave Chinner * flush lock may already be held here and waiting on it can result in very 8186d8b79cfSDave Chinner * long latencies. Hence for sync reclaims, where we wait on the flush lock, 8196d8b79cfSDave Chinner * the caller should push the AIL first before trying to reclaim inodes to 8206d8b79cfSDave Chinner * minimise the amount of time spent waiting. For background relaim, we only 8216d8b79cfSDave Chinner * bother to reclaim clean inodes anyway. 8226d8b79cfSDave Chinner * 8236d8b79cfSDave Chinner * Hence the order of actions after gaining the locks should be: 8246d8b79cfSDave Chinner * bad => reclaim 8256d8b79cfSDave Chinner * shutdown => unpin and reclaim 8266d8b79cfSDave Chinner * pinned, async => requeue 8276d8b79cfSDave Chinner * pinned, sync => unpin 8286d8b79cfSDave Chinner * stale => reclaim 8296d8b79cfSDave Chinner * clean => reclaim 8306d8b79cfSDave Chinner * dirty, async => requeue 8316d8b79cfSDave Chinner * dirty, sync => flush, wait and reclaim 8326d8b79cfSDave Chinner */ 8336d8b79cfSDave Chinner STATIC int 8346d8b79cfSDave Chinner xfs_reclaim_inode( 8356d8b79cfSDave Chinner struct xfs_inode *ip, 8366d8b79cfSDave Chinner struct xfs_perag *pag, 8376d8b79cfSDave Chinner int sync_mode) 8386d8b79cfSDave Chinner { 8396d8b79cfSDave Chinner struct xfs_buf *bp = NULL; 8406d8b79cfSDave Chinner int error; 8416d8b79cfSDave Chinner 8426d8b79cfSDave Chinner restart: 8436d8b79cfSDave Chinner error = 0; 8446d8b79cfSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 8456d8b79cfSDave Chinner if (!xfs_iflock_nowait(ip)) { 8466d8b79cfSDave Chinner if (!(sync_mode & SYNC_WAIT)) 8476d8b79cfSDave Chinner goto out; 8486d8b79cfSDave Chinner xfs_iflock(ip); 8496d8b79cfSDave Chinner } 8506d8b79cfSDave Chinner 8516d8b79cfSDave Chinner if (is_bad_inode(VFS_I(ip))) 8526d8b79cfSDave Chinner goto reclaim; 8536d8b79cfSDave Chinner if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 8546d8b79cfSDave Chinner xfs_iunpin_wait(ip); 8556d8b79cfSDave Chinner xfs_iflush_abort(ip, false); 8566d8b79cfSDave Chinner goto reclaim; 8576d8b79cfSDave Chinner } 8586d8b79cfSDave Chinner if (xfs_ipincount(ip)) { 8596d8b79cfSDave Chinner if (!(sync_mode & SYNC_WAIT)) 8606d8b79cfSDave Chinner goto out_ifunlock; 8616d8b79cfSDave Chinner xfs_iunpin_wait(ip); 8626d8b79cfSDave Chinner } 8636d8b79cfSDave Chinner if (xfs_iflags_test(ip, XFS_ISTALE)) 8646d8b79cfSDave Chinner goto reclaim; 8656d8b79cfSDave Chinner if (xfs_inode_clean(ip)) 8666d8b79cfSDave Chinner goto reclaim; 8676d8b79cfSDave Chinner 8686d8b79cfSDave Chinner /* 8696d8b79cfSDave Chinner * Never flush out dirty data during non-blocking reclaim, as it would 8706d8b79cfSDave Chinner * just contend with AIL pushing trying to do the same job. 8716d8b79cfSDave Chinner */ 8726d8b79cfSDave Chinner if (!(sync_mode & SYNC_WAIT)) 8736d8b79cfSDave Chinner goto out_ifunlock; 8746d8b79cfSDave Chinner 8756d8b79cfSDave Chinner /* 8766d8b79cfSDave Chinner * Now we have an inode that needs flushing. 8776d8b79cfSDave Chinner * 8786d8b79cfSDave Chinner * Note that xfs_iflush will never block on the inode buffer lock, as 8796d8b79cfSDave Chinner * xfs_ifree_cluster() can lock the inode buffer before it locks the 8806d8b79cfSDave Chinner * ip->i_lock, and we are doing the exact opposite here. As a result, 8816d8b79cfSDave Chinner * doing a blocking xfs_imap_to_bp() to get the cluster buffer would 8826d8b79cfSDave Chinner * result in an ABBA deadlock with xfs_ifree_cluster(). 8836d8b79cfSDave Chinner * 8846d8b79cfSDave Chinner * As xfs_ifree_cluser() must gather all inodes that are active in the 8856d8b79cfSDave Chinner * cache to mark them stale, if we hit this case we don't actually want 8866d8b79cfSDave Chinner * to do IO here - we want the inode marked stale so we can simply 8876d8b79cfSDave Chinner * reclaim it. Hence if we get an EAGAIN error here, just unlock the 8886d8b79cfSDave Chinner * inode, back off and try again. Hopefully the next pass through will 8896d8b79cfSDave Chinner * see the stale flag set on the inode. 8906d8b79cfSDave Chinner */ 8916d8b79cfSDave Chinner error = xfs_iflush(ip, &bp); 8926d8b79cfSDave Chinner if (error == EAGAIN) { 8936d8b79cfSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 8946d8b79cfSDave Chinner /* backoff longer than in xfs_ifree_cluster */ 8956d8b79cfSDave Chinner delay(2); 8966d8b79cfSDave Chinner goto restart; 8976d8b79cfSDave Chinner } 8986d8b79cfSDave Chinner 8996d8b79cfSDave Chinner if (!error) { 9006d8b79cfSDave Chinner error = xfs_bwrite(bp); 9016d8b79cfSDave Chinner xfs_buf_relse(bp); 9026d8b79cfSDave Chinner } 9036d8b79cfSDave Chinner 9046d8b79cfSDave Chinner xfs_iflock(ip); 9056d8b79cfSDave Chinner reclaim: 9066d8b79cfSDave Chinner xfs_ifunlock(ip); 9076d8b79cfSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 9086d8b79cfSDave Chinner 9096d8b79cfSDave Chinner XFS_STATS_INC(xs_ig_reclaims); 9106d8b79cfSDave Chinner /* 9116d8b79cfSDave Chinner * Remove the inode from the per-AG radix tree. 9126d8b79cfSDave Chinner * 9136d8b79cfSDave Chinner * Because radix_tree_delete won't complain even if the item was never 9146d8b79cfSDave Chinner * added to the tree assert that it's been there before to catch 9156d8b79cfSDave Chinner * problems with the inode life time early on. 9166d8b79cfSDave Chinner */ 9176d8b79cfSDave Chinner spin_lock(&pag->pag_ici_lock); 9186d8b79cfSDave Chinner if (!radix_tree_delete(&pag->pag_ici_root, 9196d8b79cfSDave Chinner XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) 9206d8b79cfSDave Chinner ASSERT(0); 9216d8b79cfSDave Chinner __xfs_inode_clear_reclaim(pag, ip); 9226d8b79cfSDave Chinner spin_unlock(&pag->pag_ici_lock); 9236d8b79cfSDave Chinner 9246d8b79cfSDave Chinner /* 9256d8b79cfSDave Chinner * Here we do an (almost) spurious inode lock in order to coordinate 9266d8b79cfSDave Chinner * with inode cache radix tree lookups. This is because the lookup 9276d8b79cfSDave Chinner * can reference the inodes in the cache without taking references. 9286d8b79cfSDave Chinner * 9296d8b79cfSDave Chinner * We make that OK here by ensuring that we wait until the inode is 9306d8b79cfSDave Chinner * unlocked after the lookup before we go ahead and free it. 9316d8b79cfSDave Chinner */ 9326d8b79cfSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 9336d8b79cfSDave Chinner xfs_qm_dqdetach(ip); 9346d8b79cfSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 9356d8b79cfSDave Chinner 9366d8b79cfSDave Chinner xfs_inode_free(ip); 9376d8b79cfSDave Chinner return error; 9386d8b79cfSDave Chinner 9396d8b79cfSDave Chinner out_ifunlock: 9406d8b79cfSDave Chinner xfs_ifunlock(ip); 9416d8b79cfSDave Chinner out: 9426d8b79cfSDave Chinner xfs_iflags_clear(ip, XFS_IRECLAIM); 9436d8b79cfSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 9446d8b79cfSDave Chinner /* 9456d8b79cfSDave Chinner * We could return EAGAIN here to make reclaim rescan the inode tree in 9466d8b79cfSDave Chinner * a short while. However, this just burns CPU time scanning the tree 9476d8b79cfSDave Chinner * waiting for IO to complete and the reclaim work never goes back to 9486d8b79cfSDave Chinner * the idle state. Instead, return 0 to let the next scheduled 9496d8b79cfSDave Chinner * background reclaim attempt to reclaim the inode again. 9506d8b79cfSDave Chinner */ 9516d8b79cfSDave Chinner return 0; 9526d8b79cfSDave Chinner } 9536d8b79cfSDave Chinner 9546d8b79cfSDave Chinner /* 9556d8b79cfSDave Chinner * Walk the AGs and reclaim the inodes in them. Even if the filesystem is 9566d8b79cfSDave Chinner * corrupted, we still want to try to reclaim all the inodes. If we don't, 9576d8b79cfSDave Chinner * then a shut down during filesystem unmount reclaim walk leak all the 9586d8b79cfSDave Chinner * unreclaimed inodes. 9596d8b79cfSDave Chinner */ 96033479e05SDave Chinner STATIC int 9616d8b79cfSDave Chinner xfs_reclaim_inodes_ag( 9626d8b79cfSDave Chinner struct xfs_mount *mp, 9636d8b79cfSDave Chinner int flags, 9646d8b79cfSDave Chinner int *nr_to_scan) 9656d8b79cfSDave Chinner { 9666d8b79cfSDave Chinner struct xfs_perag *pag; 9676d8b79cfSDave Chinner int error = 0; 9686d8b79cfSDave Chinner int last_error = 0; 9696d8b79cfSDave Chinner xfs_agnumber_t ag; 9706d8b79cfSDave Chinner int trylock = flags & SYNC_TRYLOCK; 9716d8b79cfSDave Chinner int skipped; 9726d8b79cfSDave Chinner 9736d8b79cfSDave Chinner restart: 9746d8b79cfSDave Chinner ag = 0; 9756d8b79cfSDave Chinner skipped = 0; 9766d8b79cfSDave Chinner while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 9776d8b79cfSDave Chinner unsigned long first_index = 0; 9786d8b79cfSDave Chinner int done = 0; 9796d8b79cfSDave Chinner int nr_found = 0; 9806d8b79cfSDave Chinner 9816d8b79cfSDave Chinner ag = pag->pag_agno + 1; 9826d8b79cfSDave Chinner 9836d8b79cfSDave Chinner if (trylock) { 9846d8b79cfSDave Chinner if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { 9856d8b79cfSDave Chinner skipped++; 9866d8b79cfSDave Chinner xfs_perag_put(pag); 9876d8b79cfSDave Chinner continue; 9886d8b79cfSDave Chinner } 9896d8b79cfSDave Chinner first_index = pag->pag_ici_reclaim_cursor; 9906d8b79cfSDave Chinner } else 9916d8b79cfSDave Chinner mutex_lock(&pag->pag_ici_reclaim_lock); 9926d8b79cfSDave Chinner 9936d8b79cfSDave Chinner do { 9946d8b79cfSDave Chinner struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 9956d8b79cfSDave Chinner int i; 9966d8b79cfSDave Chinner 9976d8b79cfSDave Chinner rcu_read_lock(); 9986d8b79cfSDave Chinner nr_found = radix_tree_gang_lookup_tag( 9996d8b79cfSDave Chinner &pag->pag_ici_root, 10006d8b79cfSDave Chinner (void **)batch, first_index, 10016d8b79cfSDave Chinner XFS_LOOKUP_BATCH, 10026d8b79cfSDave Chinner XFS_ICI_RECLAIM_TAG); 10036d8b79cfSDave Chinner if (!nr_found) { 10046d8b79cfSDave Chinner done = 1; 10056d8b79cfSDave Chinner rcu_read_unlock(); 10066d8b79cfSDave Chinner break; 10076d8b79cfSDave Chinner } 10086d8b79cfSDave Chinner 10096d8b79cfSDave Chinner /* 10106d8b79cfSDave Chinner * Grab the inodes before we drop the lock. if we found 10116d8b79cfSDave Chinner * nothing, nr == 0 and the loop will be skipped. 10126d8b79cfSDave Chinner */ 10136d8b79cfSDave Chinner for (i = 0; i < nr_found; i++) { 10146d8b79cfSDave Chinner struct xfs_inode *ip = batch[i]; 10156d8b79cfSDave Chinner 10166d8b79cfSDave Chinner if (done || xfs_reclaim_inode_grab(ip, flags)) 10176d8b79cfSDave Chinner batch[i] = NULL; 10186d8b79cfSDave Chinner 10196d8b79cfSDave Chinner /* 10206d8b79cfSDave Chinner * Update the index for the next lookup. Catch 10216d8b79cfSDave Chinner * overflows into the next AG range which can 10226d8b79cfSDave Chinner * occur if we have inodes in the last block of 10236d8b79cfSDave Chinner * the AG and we are currently pointing to the 10246d8b79cfSDave Chinner * last inode. 10256d8b79cfSDave Chinner * 10266d8b79cfSDave Chinner * Because we may see inodes that are from the 10276d8b79cfSDave Chinner * wrong AG due to RCU freeing and 10286d8b79cfSDave Chinner * reallocation, only update the index if it 10296d8b79cfSDave Chinner * lies in this AG. It was a race that lead us 10306d8b79cfSDave Chinner * to see this inode, so another lookup from 10316d8b79cfSDave Chinner * the same index will not find it again. 10326d8b79cfSDave Chinner */ 10336d8b79cfSDave Chinner if (XFS_INO_TO_AGNO(mp, ip->i_ino) != 10346d8b79cfSDave Chinner pag->pag_agno) 10356d8b79cfSDave Chinner continue; 10366d8b79cfSDave Chinner first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 10376d8b79cfSDave Chinner if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 10386d8b79cfSDave Chinner done = 1; 10396d8b79cfSDave Chinner } 10406d8b79cfSDave Chinner 10416d8b79cfSDave Chinner /* unlock now we've grabbed the inodes. */ 10426d8b79cfSDave Chinner rcu_read_unlock(); 10436d8b79cfSDave Chinner 10446d8b79cfSDave Chinner for (i = 0; i < nr_found; i++) { 10456d8b79cfSDave Chinner if (!batch[i]) 10466d8b79cfSDave Chinner continue; 10476d8b79cfSDave Chinner error = xfs_reclaim_inode(batch[i], pag, flags); 10486d8b79cfSDave Chinner if (error && last_error != EFSCORRUPTED) 10496d8b79cfSDave Chinner last_error = error; 10506d8b79cfSDave Chinner } 10516d8b79cfSDave Chinner 10526d8b79cfSDave Chinner *nr_to_scan -= XFS_LOOKUP_BATCH; 10536d8b79cfSDave Chinner 10546d8b79cfSDave Chinner cond_resched(); 10556d8b79cfSDave Chinner 10566d8b79cfSDave Chinner } while (nr_found && !done && *nr_to_scan > 0); 10576d8b79cfSDave Chinner 10586d8b79cfSDave Chinner if (trylock && !done) 10596d8b79cfSDave Chinner pag->pag_ici_reclaim_cursor = first_index; 10606d8b79cfSDave Chinner else 10616d8b79cfSDave Chinner pag->pag_ici_reclaim_cursor = 0; 10626d8b79cfSDave Chinner mutex_unlock(&pag->pag_ici_reclaim_lock); 10636d8b79cfSDave Chinner xfs_perag_put(pag); 10646d8b79cfSDave Chinner } 10656d8b79cfSDave Chinner 10666d8b79cfSDave Chinner /* 10676d8b79cfSDave Chinner * if we skipped any AG, and we still have scan count remaining, do 10686d8b79cfSDave Chinner * another pass this time using blocking reclaim semantics (i.e 10696d8b79cfSDave Chinner * waiting on the reclaim locks and ignoring the reclaim cursors). This 10706d8b79cfSDave Chinner * ensure that when we get more reclaimers than AGs we block rather 10716d8b79cfSDave Chinner * than spin trying to execute reclaim. 10726d8b79cfSDave Chinner */ 10736d8b79cfSDave Chinner if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) { 10746d8b79cfSDave Chinner trylock = 0; 10756d8b79cfSDave Chinner goto restart; 10766d8b79cfSDave Chinner } 10776d8b79cfSDave Chinner return XFS_ERROR(last_error); 10786d8b79cfSDave Chinner } 10796d8b79cfSDave Chinner 10806d8b79cfSDave Chinner int 10816d8b79cfSDave Chinner xfs_reclaim_inodes( 10826d8b79cfSDave Chinner xfs_mount_t *mp, 10836d8b79cfSDave Chinner int mode) 10846d8b79cfSDave Chinner { 10856d8b79cfSDave Chinner int nr_to_scan = INT_MAX; 10866d8b79cfSDave Chinner 10876d8b79cfSDave Chinner return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); 10886d8b79cfSDave Chinner } 10896d8b79cfSDave Chinner 10906d8b79cfSDave Chinner /* 10916d8b79cfSDave Chinner * Scan a certain number of inodes for reclaim. 10926d8b79cfSDave Chinner * 10936d8b79cfSDave Chinner * When called we make sure that there is a background (fast) inode reclaim in 10946d8b79cfSDave Chinner * progress, while we will throttle the speed of reclaim via doing synchronous 10956d8b79cfSDave Chinner * reclaim of inodes. That means if we come across dirty inodes, we wait for 10966d8b79cfSDave Chinner * them to be cleaned, which we hope will not be very long due to the 10976d8b79cfSDave Chinner * background walker having already kicked the IO off on those dirty inodes. 10986d8b79cfSDave Chinner */ 10996d8b79cfSDave Chinner void 11006d8b79cfSDave Chinner xfs_reclaim_inodes_nr( 11016d8b79cfSDave Chinner struct xfs_mount *mp, 11026d8b79cfSDave Chinner int nr_to_scan) 11036d8b79cfSDave Chinner { 11046d8b79cfSDave Chinner /* kick background reclaimer and push the AIL */ 11056d8b79cfSDave Chinner xfs_reclaim_work_queue(mp); 11066d8b79cfSDave Chinner xfs_ail_push_all(mp->m_ail); 11076d8b79cfSDave Chinner 11086d8b79cfSDave Chinner xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); 11096d8b79cfSDave Chinner } 11106d8b79cfSDave Chinner 11116d8b79cfSDave Chinner /* 11126d8b79cfSDave Chinner * Return the number of reclaimable inodes in the filesystem for 11136d8b79cfSDave Chinner * the shrinker to determine how much to reclaim. 11146d8b79cfSDave Chinner */ 11156d8b79cfSDave Chinner int 11166d8b79cfSDave Chinner xfs_reclaim_inodes_count( 11176d8b79cfSDave Chinner struct xfs_mount *mp) 11186d8b79cfSDave Chinner { 11196d8b79cfSDave Chinner struct xfs_perag *pag; 11206d8b79cfSDave Chinner xfs_agnumber_t ag = 0; 11216d8b79cfSDave Chinner int reclaimable = 0; 11226d8b79cfSDave Chinner 11236d8b79cfSDave Chinner while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 11246d8b79cfSDave Chinner ag = pag->pag_agno + 1; 11256d8b79cfSDave Chinner reclaimable += pag->pag_ici_reclaimable; 11266d8b79cfSDave Chinner xfs_perag_put(pag); 11276d8b79cfSDave Chinner } 11286d8b79cfSDave Chinner return reclaimable; 11296d8b79cfSDave Chinner } 11306d8b79cfSDave Chinner 1131*27b52867SBrian Foster void 1132*27b52867SBrian Foster xfs_inode_set_eofblocks_tag( 1133*27b52867SBrian Foster xfs_inode_t *ip) 1134*27b52867SBrian Foster { 1135*27b52867SBrian Foster struct xfs_mount *mp = ip->i_mount; 1136*27b52867SBrian Foster struct xfs_perag *pag; 1137*27b52867SBrian Foster int tagged; 1138*27b52867SBrian Foster 1139*27b52867SBrian Foster pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1140*27b52867SBrian Foster spin_lock(&pag->pag_ici_lock); 1141*27b52867SBrian Foster trace_xfs_inode_set_eofblocks_tag(ip); 1142*27b52867SBrian Foster 1143*27b52867SBrian Foster tagged = radix_tree_tagged(&pag->pag_ici_root, 1144*27b52867SBrian Foster XFS_ICI_EOFBLOCKS_TAG); 1145*27b52867SBrian Foster radix_tree_tag_set(&pag->pag_ici_root, 1146*27b52867SBrian Foster XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 1147*27b52867SBrian Foster XFS_ICI_EOFBLOCKS_TAG); 1148*27b52867SBrian Foster if (!tagged) { 1149*27b52867SBrian Foster /* propagate the eofblocks tag up into the perag radix tree */ 1150*27b52867SBrian Foster spin_lock(&ip->i_mount->m_perag_lock); 1151*27b52867SBrian Foster radix_tree_tag_set(&ip->i_mount->m_perag_tree, 1152*27b52867SBrian Foster XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 1153*27b52867SBrian Foster XFS_ICI_EOFBLOCKS_TAG); 1154*27b52867SBrian Foster spin_unlock(&ip->i_mount->m_perag_lock); 1155*27b52867SBrian Foster 1156*27b52867SBrian Foster trace_xfs_perag_set_eofblocks(ip->i_mount, pag->pag_agno, 1157*27b52867SBrian Foster -1, _RET_IP_); 1158*27b52867SBrian Foster } 1159*27b52867SBrian Foster 1160*27b52867SBrian Foster spin_unlock(&pag->pag_ici_lock); 1161*27b52867SBrian Foster xfs_perag_put(pag); 1162*27b52867SBrian Foster } 1163*27b52867SBrian Foster 1164*27b52867SBrian Foster void 1165*27b52867SBrian Foster xfs_inode_clear_eofblocks_tag( 1166*27b52867SBrian Foster xfs_inode_t *ip) 1167*27b52867SBrian Foster { 1168*27b52867SBrian Foster struct xfs_mount *mp = ip->i_mount; 1169*27b52867SBrian Foster struct xfs_perag *pag; 1170*27b52867SBrian Foster 1171*27b52867SBrian Foster pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1172*27b52867SBrian Foster spin_lock(&pag->pag_ici_lock); 1173*27b52867SBrian Foster trace_xfs_inode_clear_eofblocks_tag(ip); 1174*27b52867SBrian Foster 1175*27b52867SBrian Foster radix_tree_tag_clear(&pag->pag_ici_root, 1176*27b52867SBrian Foster XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 1177*27b52867SBrian Foster XFS_ICI_EOFBLOCKS_TAG); 1178*27b52867SBrian Foster if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_EOFBLOCKS_TAG)) { 1179*27b52867SBrian Foster /* clear the eofblocks tag from the perag radix tree */ 1180*27b52867SBrian Foster spin_lock(&ip->i_mount->m_perag_lock); 1181*27b52867SBrian Foster radix_tree_tag_clear(&ip->i_mount->m_perag_tree, 1182*27b52867SBrian Foster XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 1183*27b52867SBrian Foster XFS_ICI_EOFBLOCKS_TAG); 1184*27b52867SBrian Foster spin_unlock(&ip->i_mount->m_perag_lock); 1185*27b52867SBrian Foster trace_xfs_perag_clear_eofblocks(ip->i_mount, pag->pag_agno, 1186*27b52867SBrian Foster -1, _RET_IP_); 1187*27b52867SBrian Foster } 1188*27b52867SBrian Foster 1189*27b52867SBrian Foster spin_unlock(&pag->pag_ici_lock); 1190*27b52867SBrian Foster xfs_perag_put(pag); 1191*27b52867SBrian Foster } 1192*27b52867SBrian Foster 1193